From 2690948fb7eeef1b01a4b5bef6604a71ecdff58b Mon Sep 17 00:00:00 2001 From: Gaurav Jain <gjn@google.com> Date: Tue, 5 Nov 2019 02:09:58 -0800 Subject: [PATCH] Make device listing APIs non-experimental We make listing logical & physical devices non-experimental, along with setting the device visibility and logical device configuration. The existing experimental symbols are left exposed to avoid the need to update user code. However, we remove tf.config.experimental_list_devices in favor of tf.config.list_logical_devices. PiperOrigin-RevId: 278576674 Change-Id: I86106308457e8ff8faf77b1f5ed6cb4433d8bc41 --- .../cluster_resolver/cluster_resolver.py | 17 +- .../cluster_resolver/cluster_resolver_test.py | 73 ++++---- .../tfconfig_cluster_resolver_test.py | 29 ++-- .../tpu_cluster_resolver_test.py | 31 ++-- .../distribute/mirrored_variable_test.py | 6 +- .../multi_worker_continuous_run_test.py | 4 +- .../distribute/strategy_combinations.py | 11 +- .../distribute/strategy_combinations_test.py | 4 +- .../benchmarks/resnet50/resnet50_test_util.py | 2 +- tensorflow/python/eager/context.py | 60 +++---- tensorflow/python/eager/core_test.py | 6 +- .../python/eager/function_gradients_test.py | 10 +- tensorflow/python/eager/function_test.py | 10 +- .../python/eager/remote_cloud_tpu_test.py | 12 +- tensorflow/python/framework/config.py | 162 +++++++++++------- tensorflow/python/framework/config_test.py | 64 +++---- tensorflow/python/framework/test_util.py | 4 +- tensorflow/python/keras/backend.py | 3 +- .../keras/utils/multi_gpu_utils_test.py | 11 +- .../python/ops/collective_ops_gpu_test.py | 6 +- tensorflow/python/ops/collective_ops_test.py | 6 +- tensorflow/python/tpu/tpu_system_metadata.py | 16 +- ...config.-logical-device-configuration.pbtxt | 19 ++ .../tensorflow.config.-logical-device.pbtxt | 23 +++ .../tensorflow.config.-physical-device.pbtxt | 23 +++ ...mental.-virtual-device-configuration.pbtxt | 4 +- .../v1/tensorflow.config.experimental.pbtxt | 2 +- .../api/golden/v1/tensorflow.config.pbtxt | 40 ++++- ...config.-logical-device-configuration.pbtxt | 19 ++ .../tensorflow.config.-logical-device.pbtxt | 23 +++ .../tensorflow.config.-physical-device.pbtxt | 23 +++ ...mental.-virtual-device-configuration.pbtxt | 4 +- .../v2/tensorflow.config.experimental.pbtxt | 2 +- .../api/golden/v2/tensorflow.config.pbtxt | 40 ++++- 34 files changed, 490 insertions(+), 279 deletions(-) create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device-configuration.pbtxt create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device.pbtxt create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.config.-physical-device.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device-configuration.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.config.-physical-device.pbtxt diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py index 5b61f847801..1cafbcee93a 100644 --- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py @@ -21,19 +21,16 @@ from __future__ import print_function import abc import collections -import re import six from tensorflow.python.client import session from tensorflow.python.eager import context +from tensorflow.python.framework import config from tensorflow.python.framework import ops from tensorflow.python.training.server_lib import ClusterSpec from tensorflow.python.util.tf_export import tf_export -DEVICE_TYPE_REGEX = re.compile('.*device:([^:]+).*') - - def format_master_url(master, rpc_layer=None): if rpc_layer: return '%s://%s' % (rpc_layer, master) @@ -44,16 +41,12 @@ def format_master_url(master, rpc_layer=None): def get_accelerator_devices(master, config_proto): """Returns accelerator devices given a master and a configuration.""" if context.executing_eagerly(): - device_names = context.list_devices() # list_devices returns list(string) + logical_devices = config.list_logical_devices() devices = [] - for name in device_names: - device_type = 'GPU' # default device type is GPU - device_match = DEVICE_TYPE_REGEX.match(name) - if device_match: - device_type = device_match.group(1) - if device_type == 'CPU' or device_type == 'XLA_CPU': # Filter CPUs + for d in logical_devices: + if d.device_type == 'CPU' or d.device_type == 'XLA_CPU': # Filter CPUs continue - devices.append(session._DeviceAttributes(name, device_type, 0, 0)) # pylint: disable=protected-access + devices.append(session._DeviceAttributes(d.name, d.device_type, 0, 0)) # pylint: disable=protected-access return devices else: with ops.Graph().as_default(): diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py index c9aebbb4685..d4ebd2c8e14 100644 --- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py @@ -18,11 +18,12 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python import eager +from tensorflow.python import framework from tensorflow.python.client import session from tensorflow.python.distribute.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver +from tensorflow.python.eager.context import LogicalDevice from tensorflow.python.framework import test_util from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -45,69 +46,69 @@ class MockBaseClusterResolver(ClusterResolver): @test_util.run_all_in_graph_and_eager_modes class BaseClusterResolverTest(test.TestCase): - @mock.patch.object(eager.context, "list_devices") + @mock.patch.object(framework.config, "list_logical_devices") @mock.patch.object(session.BaseSession, "list_devices") def testNumAcceleratorsSuccess(self, mock_list_devices, mock_eager_list_devices): - device_names = [ - "/job:worker/task:0/device:GPU:0", - "/job:worker/task:0/device:GPU:1", - "/job:worker/task:0/device:GPU:2", - "/job:worker/task:0/device:GPU:3", + devices = [ + LogicalDevice("/job:worker/task:0/device:GPU:0", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:1", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:2", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:3", "GPU"), ] device_list = [ - session._DeviceAttributes(name, "GPU", 1024, 0) - for name in device_names + session._DeviceAttributes(d.name, d.device_type, 1024, 0) + for d in devices ] - mock_eager_list_devices.return_value = device_names + mock_eager_list_devices.return_value = devices mock_list_devices.return_value = device_list resolver = MockBaseClusterResolver() self.assertEqual(resolver.num_accelerators(), {"GPU": 4}) - @mock.patch.object(eager.context, "list_devices") + @mock.patch.object(framework.config, "list_logical_devices") @mock.patch.object(session.BaseSession, "list_devices") def testNumAcceleratorsMultiDeviceSuccess(self, mock_list_devices, mock_eager_list_devices): - device_names = [ - "/job:worker/task:0/device:TPU:0", - "/job:worker/task:0/device:TPU:1", - "/job:worker/task:0/device:TPU:2", - "/job:worker/task:0/device:TPU:3", - "/job:worker/task:0/device:GPU:0", - "/job:worker/task:0/device:GPU:1", - "/job:worker/task:0/device:GPU:2", - "/job:worker/task:0/device:GPU:3", + devices = [ + LogicalDevice("/job:worker/task:0/device:TPU:0", "TPU"), + LogicalDevice("/job:worker/task:0/device:TPU:1", "TPU"), + LogicalDevice("/job:worker/task:0/device:TPU:2", "TPU"), + LogicalDevice("/job:worker/task:0/device:TPU:3", "TPU"), + LogicalDevice("/job:worker/task:0/device:GPU:0", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:1", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:2", "GPU"), + LogicalDevice("/job:worker/task:0/device:GPU:3", "GPU"), ] device_list = [ - session._DeviceAttributes(name, name[26:29], 1024, 0) - for name in device_names + session._DeviceAttributes(d.name, d.device_type, 1024, 0) + for d in devices ] - mock_eager_list_devices.return_value = device_names + mock_eager_list_devices.return_value = devices mock_list_devices.return_value = device_list resolver = MockBaseClusterResolver() self.assertEqual(resolver.num_accelerators(), {"TPU": 4, "GPU": 4}) - @mock.patch.object(eager.context, "list_devices") + @mock.patch.object(framework.config, "list_logical_devices") @mock.patch.object(session.BaseSession, "list_devices") def testNumAcceleratorsFilterTasks(self, mock_list_devices, mock_eager_list_devices): - device_names = [ - "/job:worker1/task:0/device:TPU:0", - "/job:worker1/task:0/device:TPU:1", - "/job:worker1/task:0/device:GPU:0", - "/job:worker1/task:0/device:GPU:1", - "/job:worker2/task:1/device:TPU:2", - "/job:worker2/task:2/device:TPU:3", - "/job:worker2/task:3/device:GPU:2", - "/job:worker2/task:4/device:GPU:3", + devices = [ + LogicalDevice("/job:worker1/task:0/device:TPU:0", "TPU"), + LogicalDevice("/job:worker1/task:0/device:TPU:1", "TPU"), + LogicalDevice("/job:worker1/task:0/device:GPU:0", "GPU"), + LogicalDevice("/job:worker1/task:0/device:GPU:1", "GPU"), + LogicalDevice("/job:worker2/task:1/device:TPU:2", "TPU"), + LogicalDevice("/job:worker2/task:2/device:TPU:3", "TPU"), + LogicalDevice("/job:worker2/task:3/device:GPU:2", "GPU"), + LogicalDevice("/job:worker2/task:4/device:GPU:3", "GPU"), ] device_list = [ - session._DeviceAttributes(name, name[27:30], 1024, 0) - for name in device_names + session._DeviceAttributes(d.name, d.device_type, 1024, 0) + for d in devices ] - mock_eager_list_devices.return_value = device_names + mock_eager_list_devices.return_value = devices mock_list_devices.return_value = device_list resolver = MockBaseClusterResolver() diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py index b68d8bcd0ef..c239d60a224 100644 --- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py @@ -20,9 +20,10 @@ from __future__ import print_function import os -from tensorflow.python import eager +from tensorflow.python import framework from tensorflow.python.client import session from tensorflow.python.distribute.cluster_resolver import TFConfigClusterResolver +from tensorflow.python.eager.context import LogicalDevice from tensorflow.python.framework import test_util from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -243,7 +244,7 @@ class TFConfigClusterResolverTest(test.TestCase): cluster_resolver = TFConfigClusterResolver() self.assertEqual('', cluster_resolver.master()) - @mock.patch.object(eager.context, 'list_devices') + @mock.patch.object(framework.config, 'list_logical_devices') @mock.patch.object(session.BaseSession, 'list_devices') def testNumAcceleratorsFilterTasksByEnvVar(self, mock_list_devices, mock_eager_list_devices): @@ -261,21 +262,21 @@ class TFConfigClusterResolverTest(test.TestCase): } """ - device_names = [ - '/job:worker1/task:0/device:TPU:0', - '/job:worker1/task:0/device:TPU:1', - '/job:worker1/task:0/device:GPU:0', - '/job:worker1/task:0/device:GPU:1', - '/job:worker2/task:1/device:TPU:2', - '/job:worker2/task:2/device:TPU:3', - '/job:worker2/task:3/device:GPU:2', - '/job:worker2/task:4/device:GPU:3', + devices = [ + LogicalDevice('/job:worker1/task:0/device:TPU:0', 'TPU'), + LogicalDevice('/job:worker1/task:0/device:TPU:1', 'TPU'), + LogicalDevice('/job:worker1/task:0/device:GPU:0', 'GPU'), + LogicalDevice('/job:worker1/task:0/device:GPU:1', 'GPU'), + LogicalDevice('/job:worker2/task:1/device:TPU:2', 'TPU'), + LogicalDevice('/job:worker2/task:2/device:TPU:3', 'TPU'), + LogicalDevice('/job:worker2/task:3/device:GPU:2', 'GPU'), + LogicalDevice('/job:worker2/task:4/device:GPU:3', 'GPU'), ] device_list = [ - session._DeviceAttributes(name, name[27:30], 1024, 0) - for name in device_names + session._DeviceAttributes(d.name, d.device_type, 1024, 0) + for d in devices ] - mock_eager_list_devices.return_value = device_names + mock_eager_list_devices.return_value = devices mock_list_devices.return_value = device_list resolver = TFConfigClusterResolver() diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py index 83ded5c18b6..2cf6301eefe 100644 --- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py @@ -23,9 +23,10 @@ import os import six from six.moves.urllib.error import URLError -from tensorflow.python import eager +from tensorflow.python import framework from tensorflow.python.client import session from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver +from tensorflow.python.eager.context import LogicalDevice from tensorflow.python.framework import errors from tensorflow.python.framework import test_util from tensorflow.python.platform import test @@ -636,33 +637,33 @@ class TPUClusterResolverTest(test.TestCase): 1: [1, 2] }) - @mock.patch.object(eager.context, 'list_devices') + @mock.patch.object(framework.config, 'list_logical_devices') @mock.patch.object(session.BaseSession, 'list_devices') @mock.patch.object(resolver, 'is_running_in_gce', mock_is_not_running_in_gce) def testNumAcceleratorsSuccess(self, mock_list_devices, mock_eager_list_devices): - device_names = [ - '/job:tpu_worker/task:0/device:TPU:0', - '/job:tpu_worker/task:1/device:TPU:1', - '/job:tpu_worker/task:2/device:TPU:0', - '/job:tpu_worker/task:3/device:TPU:1', - '/job:tpu_worker/task:0/device:TPU:4', - '/job:tpu_worker/task:1/device:TPU:5', - '/job:tpu_worker/task:2/device:TPU:4', - '/job:tpu_worker/task:3/device:TPU:5', + devices = [ + LogicalDevice('/job:tpu_worker/task:0/device:TPU:0', 'TPU'), + LogicalDevice('/job:tpu_worker/task:1/device:TPU:1', 'TPU'), + LogicalDevice('/job:tpu_worker/task:2/device:TPU:0', 'TPU'), + LogicalDevice('/job:tpu_worker/task:3/device:TPU:1', 'TPU'), + LogicalDevice('/job:tpu_worker/task:0/device:TPU:4', 'TPU'), + LogicalDevice('/job:tpu_worker/task:1/device:TPU:5', 'TPU'), + LogicalDevice('/job:tpu_worker/task:2/device:TPU:4', 'TPU'), + LogicalDevice('/job:tpu_worker/task:3/device:TPU:5', 'TPU'), ] device_list = [ - session._DeviceAttributes( - name, 'TPU', 1024, 0) for name in device_names + session._DeviceAttributes(d.name, d.device_type, 1024, 0) + for d in devices ] - mock_eager_list_devices.return_value = device_names + mock_eager_list_devices.return_value = devices mock_list_devices.return_value = device_list cluster_resolver = resolver.TPUClusterResolver(tpu='') self.assertEqual(cluster_resolver.num_accelerators(), {'TPU': 2}) - @mock.patch.object(eager.context, 'list_devices') + @mock.patch.object(framework.config, 'list_logical_devices') @mock.patch.object(session.BaseSession, 'list_devices') @mock.patch.object(resolver, 'is_running_in_gce', mock_is_not_running_in_gce) diff --git a/tensorflow/python/distribute/mirrored_variable_test.py b/tensorflow/python/distribute/mirrored_variable_test.py index a5e682f09c3..f237ee19205 100644 --- a/tensorflow/python/distribute/mirrored_variable_test.py +++ b/tensorflow/python/distribute/mirrored_variable_test.py @@ -52,9 +52,9 @@ def _replica_id(): def _mimic_two_cpus(): cpus = config.list_physical_devices("CPU") - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), ]) diff --git a/tensorflow/python/distribute/multi_worker_continuous_run_test.py b/tensorflow/python/distribute/multi_worker_continuous_run_test.py index f5b98377fc2..8785b56d1b9 100644 --- a/tensorflow/python/distribute/multi_worker_continuous_run_test.py +++ b/tensorflow/python/distribute/multi_worker_continuous_run_test.py @@ -70,8 +70,8 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase): if gpus: # Set virtual GPU with memory limit of 64MB so that multiple worker # processes can share the physical GPU - config.set_virtual_device_configuration( - gpus[0], [context.VirtualDeviceConfiguration(64)]) + config.set_logical_device_configuration( + gpus[0], [context.LogicalDeviceConfiguration(64)]) for _ in range(100): worker_step_fn() diff --git a/tensorflow/python/distribute/strategy_combinations.py b/tensorflow/python/distribute/strategy_combinations.py index 193a84bb09a..6f34c7c71ab 100644 --- a/tensorflow/python/distribute/strategy_combinations.py +++ b/tensorflow/python/distribute/strategy_combinations.py @@ -173,12 +173,13 @@ def set_virtual_cpus_to_at_least(num_virtual_cpus): physical_devices = config.list_physical_devices("CPU") if not physical_devices: raise RuntimeError("No CPUs found") - configs = config.get_virtual_device_configuration(physical_devices[0]) + configs = config.get_logical_device_configuration(physical_devices[0]) if configs is None: - virtual_devices = [context.VirtualDeviceConfiguration() - for _ in range(num_virtual_cpus)] - config.set_virtual_device_configuration( - physical_devices[0], virtual_devices) + logical_devices = [ + context.LogicalDeviceConfiguration() for _ in range(num_virtual_cpus) + ] + config.set_logical_device_configuration(physical_devices[0], + logical_devices) else: if len(configs) < num_virtual_cpus: raise RuntimeError("Already configured with %d < %d virtual CPUs" % diff --git a/tensorflow/python/distribute/strategy_combinations_test.py b/tensorflow/python/distribute/strategy_combinations_test.py index 082707c82d5..b41599af5b8 100644 --- a/tensorflow/python/distribute/strategy_combinations_test.py +++ b/tensorflow/python/distribute/strategy_combinations_test.py @@ -38,12 +38,12 @@ class StrategyCombinationsTest(test.TestCase, parameterized.TestCase): def test3VirtualCPUs(self): cpu_device = config.list_physical_devices("CPU")[0] - self.assertLen(config.get_virtual_device_configuration(cpu_device), 3) + self.assertLen(config.get_logical_device_configuration(cpu_device), 3) def testSetVirtualCPUsAgain(self): strategy_combinations.set_virtual_cpus_to_at_least(2) cpu_device = config.list_physical_devices("CPU")[0] - self.assertLen(config.get_virtual_device_configuration(cpu_device), 3) + self.assertLen(config.get_logical_device_configuration(cpu_device), 3) def testSetVirtualCPUsErrors(self): with self.assertRaises(ValueError): diff --git a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py index 50121917d12..dacc6fb6a85 100644 --- a/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py +++ b/tensorflow/python/eager/benchmarks/resnet50/resnet50_test_util.py @@ -24,7 +24,7 @@ import tensorflow as tf def device_and_data_format(): - if tf.config.experimental.list_physical_devices('GPU'): + if tf.config.list_physical_devices('GPU'): return ('/gpu:0', 'channels_first') return ('/cpu:0', 'channels_last') diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 99f2e218ef5..45788354d3a 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -234,14 +234,15 @@ class _ContextSwitchStack(threading.local): self.stack.pop() +@tf_export("config.LogicalDevice") class LogicalDevice( collections.namedtuple("LogicalDevice", ["name", "device_type"])): - """Abstraction for a device initialized by the runtime. + """Abstraction for a logical device initialized by the runtime. - A LogicalDevice corresponds to a initialized instance on a PhysicalDevice or a - remote device available in the cluster. Tensors and operations can be placed - on a specific LogicalDevice by calling `tf.device()` with the `name` of the - LogicalDevice. + A `tf.config.LogicalDevice` corresponds to an initialized logical device on a + `tf.config.PhysicalDevice` or a remote device visible to the cluster. Tensors + and operations can be placed on a specific logical device by calling + `tf.device` with a specified `tf.config.LogicalDevice`. Fields: name: The fully qualified name of the device. Can be used for Op or function @@ -251,16 +252,18 @@ class LogicalDevice( pass -@tf_export("config.experimental.VirtualDeviceConfiguration") -class VirtualDeviceConfiguration( - collections.namedtuple("VirtualDeviceConfiguration", ["memory_limit"])): - """Configuration class for a `LogicalDevice`. +@tf_export("config.LogicalDeviceConfiguration", + "config.experimental.VirtualDeviceConfiguration") +class LogicalDeviceConfiguration( + collections.namedtuple("LogicalDeviceConfiguration", ["memory_limit"])): + """Configuration class for a logical devices. - The class specifies the parameters for a `LogicalDevice` used during runtime + The class specifies the parameters to configure a `tf.config.PhysicalDevice` + as it is initialized to a `tf.config.LogicalDevice` during runtime initialization. Not all fields are valid for all device types. - See `tf.config.experimental.get_virtual_device_configuration` and - `tf.config.experimental.set_virtual_device_configuration` for usage examples. + See `tf.config.get_logical_device_configuration` and + `tf.config.set_logical_device_configuration` for usage examples. Fields: memory_limit: (optional) Maximum memory (in MB) to allocate on the virtual @@ -268,9 +271,10 @@ class VirtualDeviceConfiguration( """ def __new__(cls, memory_limit=None): - return super(VirtualDeviceConfiguration, cls).__new__(cls, memory_limit) + return super(LogicalDeviceConfiguration, cls).__new__(cls, memory_limit) +@tf_export("config.PhysicalDevice") class PhysicalDevice( collections.namedtuple("PhysicalDevice", ["name", "device_type"])): """Abstraction for a locally visible physical device. @@ -280,10 +284,13 @@ class PhysicalDevice( customize certain properties of the device such as it's visibility or memory configuration. - Once a PhysicalDevice is initialized one or many LogicalDevice objects are - created. Use tf.config.set_virtual_device_configuration() to create multiple - LogicalDevice objects for a PhysicalDevice. This is useful when separation - between models is needed. + Once a visible `tf.config.PhysicalDevice` is initialized one or more + `tf.config.LogicalDevice` objects are created. Use + `tf.config.set_visible_devices` to configure the visibility of a physical + device and `tf.config.set_logical_device_configuration` to configure multiple + `tf.config.LogicalDevice` objects for a `tf.config.PhysicalDevice`. This is + useful when separation between models is needed or to simulate a multi-device + environment. Fields: name: Unique identifier for device. @@ -1114,8 +1121,8 @@ class Context(object): if num_cpus == 0: self.set_visible_devices([], "CPU") elif num_cpus > 1: - self.set_virtual_device_configuration( - cpus[0], [VirtualDeviceConfiguration() for _ in range(num_cpus)]) + self.set_logical_device_configuration( + cpus[0], [LogicalDeviceConfiguration() for _ in range(num_cpus)]) # Parse GPU options gpus = [d for d in self._physical_devices if d.device_type == "GPU"] @@ -1224,7 +1231,7 @@ class Context(object): self._memory_growth_map[dev] = enable - def get_virtual_device_configuration(self, dev): + def get_logical_device_configuration(self, dev): """Get the virtual device configuration for a PhysicalDevice.""" self._initialize_physical_devices() @@ -1233,7 +1240,7 @@ class Context(object): return self._virtual_device_map.get(dev) - def set_virtual_device_configuration(self, dev, virtual_devices): + def set_logical_device_configuration(self, dev, virtual_devices): """Set the virtual device configuration for a PhysicalDevice.""" self._initialize_physical_devices() @@ -1801,17 +1808,6 @@ def device(name): return context().device(name) -@tf_export("config.experimental_list_devices") -def list_devices(): - """List the names of the available devices. - - Returns: - Names of the available devices, as a `list`. - """ - ensure_initialized() - return context().devices() - - @tf_export("debugging.get_log_device_placement") def get_log_device_placement(): """Get if device placements are logged. diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 1fa95cc193f..5039108d4d4 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -69,9 +69,9 @@ def current_device(): def configure_virtual_cpus(): cpus = config.list_physical_devices('CPU') # Set 2 virtual CPUs - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) diff --git a/tensorflow/python/eager/function_gradients_test.py b/tensorflow/python/eager/function_gradients_test.py index 1b052ad4f45..ffd84fc56af 100644 --- a/tensorflow/python/eager/function_gradients_test.py +++ b/tensorflow/python/eager/function_gradients_test.py @@ -55,11 +55,11 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase): super(FunctionGradientsTest, self).setUp() cpus = config.list_physical_devices('CPU') # Set 4 virtual CPUs - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) def testGraphModeWithGradients(self): diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index ca9fb7b68da..ca41f833625 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -133,11 +133,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): super(FunctionTest, self).setUp() cpus = config.list_physical_devices('CPU') # Set 4 virtual CPUs - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) def testBasic(self): diff --git a/tensorflow/python/eager/remote_cloud_tpu_test.py b/tensorflow/python/eager/remote_cloud_tpu_test.py index be8d50256f2..1d2e05d764d 100644 --- a/tensorflow/python/eager/remote_cloud_tpu_test.py +++ b/tensorflow/python/eager/remote_cloud_tpu_test.py @@ -22,8 +22,8 @@ from absl import flags from absl.testing import absltest from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver -from tensorflow.python.eager import context from tensorflow.python.eager import remote +from tensorflow.python.framework import config from tensorflow.python.tpu import tpu_strategy_util FLAGS = flags.FLAGS @@ -56,18 +56,16 @@ class RemoteCloudTPUTest(absltest.TestCase): """Test that we can connect to a real Cloud TPU.""" def test_connect(self): - self.assertCountEqual( - EXPECTED_DEVICES_PRE_CONNECT, - context.list_devices()) + self.assertCountEqual(EXPECTED_DEVICES_PRE_CONNECT, + config.list_logical_devices()) resolver = tpu_cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu, zone=FLAGS.zone, project=FLAGS.project ) remote.connect_to_cluster(resolver) - self.assertCountEqual( - EXPECTED_DEVICES_AFTER_CONNECT, - context.list_devices()) + self.assertCountEqual(EXPECTED_DEVICES_AFTER_CONNECT, + config.list_logical_devices()) tpu_strategy_util.initialize_tpu_system(resolver) diff --git a/tensorflow/python/framework/config.py b/tensorflow/python/framework/config.py index 2f17c1f3fbe..c24b4e696e0 100644 --- a/tensorflow/python/framework/config.py +++ b/tensorflow/python/framework/config.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.eager import context +from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export @@ -293,42 +294,60 @@ def set_synchronous_execution(enable): context.context().execution_mode = context.ASYNC -@tf_export('config.experimental.list_physical_devices') +@tf_export('config.list_physical_devices', + 'config.experimental.list_physical_devices') +@deprecation.deprecated_endpoints( + 'config.experimental.list_physical_devices') def list_physical_devices(device_type=None): """Return a list of physical devices visible to the host runtime. Physical devices are hardware devices present on the host machine. By default - all discovered CPU and GPU devices are considered visible. The - `tf.config.experimental.list_physical_devices` API allows querying the - hardware prior to runtime initialization. + all discovered CPU and GPU devices are considered visible. + + This API allows querying the physical hardware resources prior to runtime + initialization. Thus, giving an opportunity to call any additional + configuration APIs. This is in contrast to `tf.config.list_logical_devices`, + which triggers runtime initialization in order to list the configured devices. The following example lists the number of visible GPUs on the host. - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> print("Num GPUs:", len(physical_devices)) Num GPUs: ... + However, the number of GPUs available to the runtime may change during runtime + initialization due to marking certain devices as not visible or configuring + multiple logical devices. + Args: device_type: (optional string) Only include devices matching this device type. For example "CPU" or "GPU". Returns: - List of discovered `PhysicalDevice`s + List of discovered `tf.config.PhysicalDevice` objects """ return context.context().list_physical_devices(device_type) -@tf_export('config.experimental.list_logical_devices') +@tf_export('config.list_logical_devices', + 'config.experimental.list_logical_devices') +@deprecation.deprecated_endpoints( + 'config.experimental.list_logical_devices') def list_logical_devices(device_type=None): """Return a list of logical devices created by runtime. Logical devices may correspond to physical devices or remote devices in the cluster. Operations and tensors may be placed on these devices by using the - `name` of the `LogicalDevice`. + `name` of the `tf.config.LogicalDevice`. + + Calling `tf.config.list_logical_devices` triggers the runtime to configure any + `tf.config.PhysicalDevice` visible to the runtime, thereby preventing + further configuration. To avoid runtime initialization, call + `tf.config.list_physical_devices` instead. For example: - >>> logical_devices = tf.config.experimental.list_logical_devices('GPU') + >>> logical_devices = tf.config.list_logical_devices('GPU') >>> if len(logical_devices) > 0: ... # Allocate on GPU:0 ... with tf.device(logical_devices[0].name): @@ -347,7 +366,10 @@ def list_logical_devices(device_type=None): return context.context().list_logical_devices(device_type=device_type) -@tf_export('config.experimental.get_visible_devices') +@tf_export('config.get_visible_devices', + 'config.experimental.get_visible_devices') +@deprecation.deprecated_endpoints( + 'config.experimental.get_visible_devices') def get_visible_devices(device_type=None): """Get the list of visible physical devices. @@ -357,11 +379,11 @@ def get_visible_devices(device_type=None): The following example verifies all visible GPUs have been disabled: - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> try: ... # Disable all GPUS - ... tf.config.experimental.set_visible_devices([], 'GPU') - ... visible_devices = tf.config.experimental.get_visible_devices() + ... tf.config.set_visible_devices([], 'GPU') + ... visible_devices = tf.config.get_visible_devices() ... for device in visible_devices: ... assert device.device_type != 'GPU' ... except: @@ -378,7 +400,10 @@ def get_visible_devices(device_type=None): return context.context().get_visible_devices(device_type) -@tf_export('config.experimental.set_visible_devices') +@tf_export('config.set_visible_devices', + 'config.experimental.set_visible_devices') +@deprecation.deprecated_endpoints( + 'config.experimental.set_visible_devices') def set_visible_devices(devices, device_type=None): """Set the list of visible devices. @@ -389,11 +414,11 @@ def set_visible_devices(devices, device_type=None): The following example demonstrates disabling the first GPU on the machine. - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> try: ... # Disable first GPU - ... tf.config.experimental.set_visible_devices(physical_devices[1:], 'GPU') - ... logical_devices = tf.config.experimental.list_logical_devices('GPU') + ... tf.config.set_visible_devices(physical_devices[1:], 'GPU') + ... logical_devices = tf.config.list_logical_devices('GPU') ... # Logical device was not created for first GPU ... assert len(logical_devices) == len(physical_devices) - 1 ... except: @@ -421,7 +446,7 @@ def get_memory_growth(device): For example: - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> try: ... tf.config.experimental.set_memory_growth(physical_devices[0], True) ... assert tf.config.experimental.get_memory_growth(physical_devices[0]) @@ -451,7 +476,7 @@ def set_memory_growth(device, enable): For example: - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> try: ... tf.config.experimental.set_memory_growth(physical_devices[0], True) ... except: @@ -469,27 +494,30 @@ def set_memory_growth(device, enable): context.context().set_memory_growth(device, enable) -@tf_export('config.experimental.get_virtual_device_configuration') -def get_virtual_device_configuration(device): - """Get the virtual device configuration for a `PhysicalDevice`. +@tf_export('config.get_logical_device_configuration', + 'config.experimental.get_virtual_device_configuration') +@deprecation.deprecated_endpoints( + 'config.experimental.get_virtual_device_configuration') +def get_logical_device_configuration(device): + """Get the virtual device configuration for a `tf.config.PhysicalDevice`. - Returns the list of `tf.config.experimental.VirtualDeviceConfiguration` + Returns the list of `tf.config.LogicalDeviceConfiguration` objects previously configured by a call to - `tf.config.experimental.set_virtual_device_configuration()`. + `tf.config.set_logical_device_configuration`. For example: - >>> physical_devices = tf.config.experimental.list_physical_devices('CPU') + >>> physical_devices = tf.config.list_physical_devices('CPU') >>> assert len(physical_devices) == 1, "No CPUs found" - >>> configs = tf.config.experimental.get_virtual_device_configuration( + >>> configs = tf.config.get_logical_device_configuration( ... physical_devices[0]) >>> try: ... assert configs is None - ... tf.config.experimental.set_virtual_device_configuration( + ... tf.config.set_logical_device_configuration( ... physical_devices[0], - ... [tf.config.experimental.VirtualDeviceConfiguration(), - ... tf.config.experimental.VirtualDeviceConfiguration()]) - ... configs = tf.config.experimental.get_virtual_device_configuration( + ... [tf.config.LogicalDeviceConfiguration(), + ... tf.config.LogicalDeviceConfiguration()]) + ... configs = tf.config.get_logical_device_configuration( ... physical_devices[0]) ... assert len(configs) == 2 ... except: @@ -500,77 +528,79 @@ def get_virtual_device_configuration(device): device: `PhysicalDevice` to query Returns: - List of `tf.config.experimental.VirtualDeviceConfiguration` objects or + List of `tf.config.LogicalDeviceConfiguration` objects or `None` if no virtual device configuration has been set for this physical device. """ - return context.context().get_virtual_device_configuration(device) + return context.context().get_logical_device_configuration(device) -@tf_export('config.experimental.set_virtual_device_configuration') -def set_virtual_device_configuration(device, virtual_devices): - """Set the virtual device configuration for a `PhysicalDevice`. +@tf_export('config.set_logical_device_configuration', + 'config.experimental.set_virtual_device_configuration') +@deprecation.deprecated_endpoints( + 'config.experimental.set_virtual_device_configuration') +def set_logical_device_configuration(device, logical_devices): + """Set the logical device configuration for a `tf.config.PhysicalDevice`. - A visible `PhysicalDevice` will by default have a single `LogicalDevice` - associated with it once the runtime is initialized. Specifying a list of - `tf.config.experimental.VirtualDeviceConfiguration`s allows multiple - devices to be on the same `PhysicalDevice`. + A visible `tf.config.PhysicalDevice` will by default have a single + `tf.config.LogicalDevice` associated with it once the runtime is initialized. + Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows + multiple devices to be created on the same `tf.config.PhysicalDevice`. - The following example splits the CPU into 2 virtual devices: + The following example splits the CPU into 2 logical devices: - >>> physical_devices = tf.config.experimental.list_physical_devices('CPU') + >>> physical_devices = tf.config.list_physical_devices('CPU') >>> assert len(physical_devices) == 1, "No CPUs found" >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported. >>> try: - ... tf.config.experimental.set_virtual_device_configuration( + ... tf.config.set_logical_device_configuration( ... physical_devices[0], - ... [tf.config.experimental.VirtualDeviceConfiguration(), - ... tf.config.experimental.VirtualDeviceConfiguration()]) - ... logical_devices = tf.config.experimental.list_logical_devices('CPU') + ... [tf.config.LogicalDeviceConfiguration(), + ... tf.config.LogicalDeviceConfiguration()]) + ... logical_devices = tf.config.list_logical_devices('CPU') ... assert len(logical_devices) == 2 ... - ... tf.config.experimental.set_virtual_device_configuration( + ... tf.config.set_logical_device_configuration( ... physical_devices[0], - ... [tf.config.experimental.VirtualDeviceConfiguration(), - ... tf.config.experimental.VirtualDeviceConfiguration(), - ... tf.config.experimental.VirtualDeviceConfiguration(), - ... tf.config.experimental.VirtualDeviceConfiguration()]) + ... [tf.config.LogicalDeviceConfiguration(), + ... tf.config.LogicalDeviceConfiguration(), + ... tf.config.LogicalDeviceConfiguration(), + ... tf.config.LogicalDeviceConfiguration()]) ... except: - ... # Cannot modify virtual devices once initialized. + ... # Cannot modify logical devices once initialized. ... pass - The following example splits the GPU into 2 virtual devices with 100 MB each: + The following example splits the GPU into 2 logical devices with 100 MB each: - >>> physical_devices = tf.config.experimental.list_physical_devices('GPU') + >>> physical_devices = tf.config.list_physical_devices('GPU') >>> try: - ... tf.config.experimental.set_virtual_device_configuration( + ... tf.config.set_logical_device_configuration( ... physical_devices[0], - ... [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=100), - ... tf.config.experimental.VirtualDeviceConfiguration(memory_limit=100)]) + ... [tf.config.LogicalDeviceConfiguration(memory_limit=100), + ... tf.config.LogicalDeviceConfiguration(memory_limit=100)]) ... - ... logical_devices = tf.config.experimental.list_logical_devices('GPU') + ... logical_devices = tf.config.list_logical_devices('GPU') ... assert len(logical_devices) == len(physical_devices) + 1 ... - ... tf.config.experimental.set_virtual_device_configuration( + ... tf.config.set_logical_device_configuration( ... physical_devices[0], - ... [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10), - ... tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10)]) + ... [tf.config.LogicalDeviceConfiguration(memory_limit=10), + ... tf.config.LogicalDeviceConfiguration(memory_limit=10)]) ... except: - ... # Invalid device or cannot modify virtual devices once initialized. + ... # Invalid device or cannot modify logical devices once initialized. ... pass Args: device: The `PhysicalDevice` to configure. - virtual_devices: (optional) List of - `tf.config.experimental.VirtualDeviceConfiguration` objects to allocate - for the specified `PhysicalDevice`. If None, the default configuration - will be used. + logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration` + objects to allocate for the specified `PhysicalDevice`. If None, the + default configuration will be used. Raises: ValueError: If argument validation fails. RuntimeError: Runtime is already initialized. """ - context.context().set_virtual_device_configuration(device, virtual_devices) + context.context().set_logical_device_configuration(device, logical_devices) @tf_export('config.experimental.enable_mlir_bridge') diff --git a/tensorflow/python/framework/config_test.py b/tensorflow/python/framework/config_test.py index 24250f90100..5f0c9b7e0bc 100644 --- a/tensorflow/python/framework/config_test.py +++ b/tensorflow/python/framework/config_test.py @@ -364,9 +364,9 @@ class DeviceTest(test.TestCase): cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) context.ensure_initialized() @@ -393,16 +393,16 @@ class DeviceTest(test.TestCase): # Modifying the CPU configuration is not supported with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'): - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) # Setting the same CPU configuration is fine - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) @test_util.run_gpu_only @@ -456,12 +456,12 @@ class DeviceTest(test.TestCase): gpus = config.list_physical_devices('GPU') self.assertNotEqual(len(gpus), 0) - self.assertIsNone(config.get_virtual_device_configuration(gpus[-1])) - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(memory_limit=10), - context.VirtualDeviceConfiguration(memory_limit=10) + self.assertIsNone(config.get_logical_device_configuration(gpus[-1])) + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(memory_limit=10), + context.LogicalDeviceConfiguration(memory_limit=10) ]) - self.assertEqual(len(config.get_virtual_device_configuration(gpus[-1])), 2) + self.assertEqual(len(config.get_logical_device_configuration(gpus[-1])), 2) logical_gpus = config.list_logical_devices('GPU') self.assertTrue(len(logical_gpus), len(gpus) + 1) @@ -477,22 +477,22 @@ class DeviceTest(test.TestCase): # Modifying the GPU configuration is not supported with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'): - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(memory_limit=20), - context.VirtualDeviceConfiguration(memory_limit=20) + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(memory_limit=20), + context.LogicalDeviceConfiguration(memory_limit=20) ]) with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'): - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(memory_limit=10), - context.VirtualDeviceConfiguration(memory_limit=10), - context.VirtualDeviceConfiguration(memory_limit=10) + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(memory_limit=10), + context.LogicalDeviceConfiguration(memory_limit=10), + context.LogicalDeviceConfiguration(memory_limit=10) ]) # Setting the same GPU configuration is fine - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(memory_limit=10), - context.VirtualDeviceConfiguration(memory_limit=10) + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(memory_limit=10), + context.LogicalDeviceConfiguration(memory_limit=10) ]) @test_util.run_gpu_only @@ -554,15 +554,15 @@ class DeviceTest(test.TestCase): self.assertTrue(c.gpu_options.allow_growth) with self.assertRaisesRegexp(ValueError, 'memory limit'): - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) - self.assertIsNone(config.get_virtual_device_configuration(gpus[-1])) - config.set_virtual_device_configuration(gpus[-1], [ - context.VirtualDeviceConfiguration(memory_limit=10), - context.VirtualDeviceConfiguration(memory_limit=10) + self.assertIsNone(config.get_logical_device_configuration(gpus[-1])) + config.set_logical_device_configuration(gpus[-1], [ + context.LogicalDeviceConfiguration(memory_limit=10), + context.LogicalDeviceConfiguration(memory_limit=10) ]) c = context.context().config diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index bcfd4713538..895d7d80248 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -1459,8 +1459,8 @@ def with_forward_compatibility_horizons(*horizons): return decorator -@deprecation.deprecated( - None, "Use `tf.config.experimental.list_physical_devices('GPU')` instead.") +@deprecation.deprecated(None, + "Use `tf.config.list_physical_devices('GPU')` instead.") @tf_export("test.is_gpu_available") def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None): """Returns whether TensorFlow can access a GPU. diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 0a6a3f24d69..411dccac202 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -41,6 +41,7 @@ from tensorflow.python.eager import context from tensorflow.python.eager import function as eager_function from tensorflow.python.eager import lift_to_graph from tensorflow.python.framework import composite_tensor +from tensorflow.python.framework import config from tensorflow.python.framework import constant_op from tensorflow.python.framework import device as tfdev from tensorflow.python.framework import dtypes as dtypes_module @@ -633,7 +634,7 @@ def _get_available_gpus(): """ if ops.executing_eagerly_outside_functions(): # Returns names of devices directly. - return [name for name in context.list_devices() if 'GPU' in name] + return [d.name for d in config.list_logical_devices('GPU')] global _LOCAL_DEVICES if _LOCAL_DEVICES is None: diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py index f101e04230a..7e9ec9358b3 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py @@ -45,15 +45,14 @@ class TestMultiGPUModel(test.TestCase): super(TestMultiGPUModel, self).__init__(methodName) gpu_devices = config.list_physical_devices('GPU') xla_gpu_devices = config.list_physical_devices('XLA_GPU') - # NOTE: XLA devices don't support the set_virtual_device_configuration + # NOTE: XLA devices don't support the set_logical_device_configuration # codepaths. if len(gpu_devices) == 1 and not xla_gpu_devices: # A GPU is available, simulate 2 instead. - config.set_virtual_device_configuration( - gpu_devices[0], [ - context.VirtualDeviceConfiguration(500), - context.VirtualDeviceConfiguration(500) - ]) + config.set_logical_device_configuration(gpu_devices[0], [ + context.LogicalDeviceConfiguration(500), + context.LogicalDeviceConfiguration(500) + ]) def test_multi_gpu_test_simple_model(self): gpus = 2 diff --git a/tensorflow/python/ops/collective_ops_gpu_test.py b/tensorflow/python/ops/collective_ops_gpu_test.py index b8f5a49266b..fb769752575 100644 --- a/tensorflow/python/ops/collective_ops_gpu_test.py +++ b/tensorflow/python/ops/collective_ops_gpu_test.py @@ -59,9 +59,9 @@ class CollectiveOpGPUTest(test.TestCase): if len(gpus) < 1: self.skipTest('Expected at least 1 GPU but found {} GPUs'.format( len(gpus))) - config.set_virtual_device_configuration(gpus[0], [ - context.VirtualDeviceConfiguration(1024), - context.VirtualDeviceConfiguration(1024) + config.set_logical_device_configuration(gpus[0], [ + context.LogicalDeviceConfiguration(1024), + context.LogicalDeviceConfiguration(1024) ]) context.ensure_initialized() diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index d6f7a5ce6c8..0e9b5498a31 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -350,9 +350,9 @@ class CollectiveOpTest(test.TestCase): def testCollectiveGroupSizeMismatch(self): cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) - config.set_virtual_device_configuration(cpus[0], [ - context.VirtualDeviceConfiguration(), - context.VirtualDeviceConfiguration() + config.set_logical_device_configuration(cpus[0], [ + context.LogicalDeviceConfiguration(), + context.LogicalDeviceConfiguration() ]) context.ensure_initialized() diff --git a/tensorflow/python/tpu/tpu_system_metadata.py b/tensorflow/python/tpu/tpu_system_metadata.py index 887ac43dc7b..8628feee418 100644 --- a/tensorflow/python/tpu/tpu_system_metadata.py +++ b/tensorflow/python/tpu/tpu_system_metadata.py @@ -24,6 +24,7 @@ import re from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context +from tensorflow.python.framework import config from tensorflow.python.framework import device as tf_device from tensorflow.python.framework import errors from tensorflow.python.framework import ops @@ -35,7 +36,6 @@ _RETRY_TIMES = 12 * 24 # 1 day _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS = 300 * 1000 # 5 mins _TPU_DEVICE_REG = re.compile(r'.*task:(\d+)/.*device:TPU:(\d+)$') -_DEVICE_TYPE_REGEX = re.compile('.*device:([^:]+).*') _DEFAULT_JOB_NAME = 'tpu_worker' _DEFAULT_COORDINATOR_JOB_NAME = 'coordinator' @@ -60,16 +60,12 @@ def _query_tpu_system_metadata(master_address, cluster_def=None, device_dict = collections.defaultdict(list) if context.executing_eagerly(): - device_names = context.list_devices() + logical_devices = config.list_logical_devices() devices = [] # We want the output type to match in both eager and session mode - for name in device_names: - device_match = _DEVICE_TYPE_REGEX.match(name) - device_type = 'CPU' - if device_match: - device_type = device_match.group(1) - devices.append(session_lib._DeviceAttributes(name, device_type, 0, 0)) # pylint: disable=protected-access + for d in logical_devices: + devices.append(session_lib._DeviceAttributes(d.name, d.device_type, 0, 0)) # pylint: disable=protected-access else: # TODO(b/120564445): Replace with standard library for retries. retry_count = 1 @@ -179,9 +175,9 @@ def _obtain_topology(master_address, cluster_def): def get_session_config_with_timeout(timeout_in_secs, cluster_def): """Returns a session given a timeout and a cluster configuration.""" - config = config_pb2.ConfigProto( + config_proto = config_pb2.ConfigProto( operation_timeout_in_ms=timeout_in_secs, cluster_def=cluster_def) - return config + return config_proto def master_job(master, cluster_def): diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device-configuration.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device-configuration.pbtxt new file mode 100644 index 00000000000..3f6c6e636a1 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device-configuration.pbtxt @@ -0,0 +1,19 @@ +path: "tensorflow.config.LogicalDeviceConfiguration" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<type \'tuple\'>" + member { + name: "memory_limit" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device.pbtxt new file mode 100644 index 00000000000..0e8c9c50c12 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.-logical-device.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.config.LogicalDevice" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>" + is_instance: "<type \'tuple\'>" + member { + name: "device_type" + mtype: "<type \'property\'>" + } + member { + name: "name" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.-physical-device.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.-physical-device.pbtxt new file mode 100644 index 00000000000..5d323c8807f --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.-physical-device.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.config.PhysicalDevice" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>" + is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>" + is_instance: "<type \'tuple\'>" + member { + name: "device_type" + mtype: "<type \'property\'>" + } + member { + name: "name" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.-virtual-device-configuration.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.-virtual-device-configuration.pbtxt index 9ff31b1a532..25b6b6e216e 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.-virtual-device-configuration.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.-virtual-device-configuration.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.config.experimental.VirtualDeviceConfiguration" tf_class { - is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>" - is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" is_instance: "<type \'tuple\'>" member { name: "memory_limit" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.pbtxt index b5cfaadcccc..f4b8bd63b0a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.experimental.pbtxt @@ -54,7 +54,7 @@ tf_module { } member_method { name: "set_virtual_device_configuration" - argspec: "args=[\'device\', \'virtual_devices\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_visible_devices" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt index 4b9089c85ef..b9d1004803f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt @@ -1,5 +1,17 @@ path: "tensorflow.config" tf_module { + member { + name: "LogicalDevice" + mtype: "<type \'type\'>" + } + member { + name: "LogicalDeviceConfiguration" + mtype: "<type \'type\'>" + } + member { + name: "PhysicalDevice" + mtype: "<type \'type\'>" + } member { name: "experimental" mtype: "<type \'module\'>" @@ -24,20 +36,40 @@ tf_module { name: "experimental_functions_run_eagerly" argspec: "args=[], varargs=None, keywords=None, defaults=None" } - member_method { - name: "experimental_list_devices" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "experimental_run_functions_eagerly" argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_logical_device_configuration" + argspec: "args=[\'device\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_soft_device_placement" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_visible_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "list_logical_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "list_physical_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_logical_device_configuration" + argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_soft_device_placement" argspec: "args=[\'enabled\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "set_visible_devices" + argspec: "args=[\'devices\', \'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device-configuration.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device-configuration.pbtxt new file mode 100644 index 00000000000..3f6c6e636a1 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device-configuration.pbtxt @@ -0,0 +1,19 @@ +path: "tensorflow.config.LogicalDeviceConfiguration" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<type \'tuple\'>" + member { + name: "memory_limit" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device.pbtxt new file mode 100644 index 00000000000..0e8c9c50c12 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.-logical-device.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.config.LogicalDevice" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>" + is_instance: "<type \'tuple\'>" + member { + name: "device_type" + mtype: "<type \'property\'>" + } + member { + name: "name" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.-physical-device.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.-physical-device.pbtxt new file mode 100644 index 00000000000..5d323c8807f --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.-physical-device.pbtxt @@ -0,0 +1,23 @@ +path: "tensorflow.config.PhysicalDevice" +tf_class { + is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>" + is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>" + is_instance: "<type \'tuple\'>" + member { + name: "device_type" + mtype: "<type \'property\'>" + } + member { + name: "name" + mtype: "<type \'property\'>" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.-virtual-device-configuration.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.-virtual-device-configuration.pbtxt index 9ff31b1a532..25b6b6e216e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.-virtual-device-configuration.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.-virtual-device-configuration.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.config.experimental.VirtualDeviceConfiguration" tf_class { - is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>" - is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" + is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>" is_instance: "<type \'tuple\'>" member { name: "memory_limit" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.pbtxt index b5cfaadcccc..f4b8bd63b0a 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.experimental.pbtxt @@ -54,7 +54,7 @@ tf_module { } member_method { name: "set_virtual_device_configuration" - argspec: "args=[\'device\', \'virtual_devices\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None" } member_method { name: "set_visible_devices" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt index 4b9089c85ef..b9d1004803f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt @@ -1,5 +1,17 @@ path: "tensorflow.config" tf_module { + member { + name: "LogicalDevice" + mtype: "<type \'type\'>" + } + member { + name: "LogicalDeviceConfiguration" + mtype: "<type \'type\'>" + } + member { + name: "PhysicalDevice" + mtype: "<type \'type\'>" + } member { name: "experimental" mtype: "<type \'module\'>" @@ -24,20 +36,40 @@ tf_module { name: "experimental_functions_run_eagerly" argspec: "args=[], varargs=None, keywords=None, defaults=None" } - member_method { - name: "experimental_list_devices" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "experimental_run_functions_eagerly" argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_logical_device_configuration" + argspec: "args=[\'device\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "get_soft_device_placement" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "get_visible_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "list_logical_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "list_physical_devices" + argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "set_logical_device_configuration" + argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "set_soft_device_placement" argspec: "args=[\'enabled\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "set_visible_devices" + argspec: "args=[\'devices\', \'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], " + } }