Make device listing APIs non-experimental
We make listing logical & physical devices non-experimental, along with setting the device visibility and logical device configuration. The existing experimental symbols are left exposed to avoid the need to update user code. However, we remove tf.config.experimental_list_devices in favor of tf.config.list_logical_devices. PiperOrigin-RevId: 278576674 Change-Id: I86106308457e8ff8faf77b1f5ed6cb4433d8bc41
This commit is contained in:
parent
f63cc98935
commit
2690948fb7
@ -21,19 +21,16 @@ from __future__ import print_function
|
||||
import abc
|
||||
|
||||
import collections
|
||||
import re
|
||||
import six
|
||||
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.framework import config
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.training.server_lib import ClusterSpec
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
DEVICE_TYPE_REGEX = re.compile('.*device:([^:]+).*')
|
||||
|
||||
|
||||
def format_master_url(master, rpc_layer=None):
|
||||
if rpc_layer:
|
||||
return '%s://%s' % (rpc_layer, master)
|
||||
@ -44,16 +41,12 @@ def format_master_url(master, rpc_layer=None):
|
||||
def get_accelerator_devices(master, config_proto):
|
||||
"""Returns accelerator devices given a master and a configuration."""
|
||||
if context.executing_eagerly():
|
||||
device_names = context.list_devices() # list_devices returns list(string)
|
||||
logical_devices = config.list_logical_devices()
|
||||
devices = []
|
||||
for name in device_names:
|
||||
device_type = 'GPU' # default device type is GPU
|
||||
device_match = DEVICE_TYPE_REGEX.match(name)
|
||||
if device_match:
|
||||
device_type = device_match.group(1)
|
||||
if device_type == 'CPU' or device_type == 'XLA_CPU': # Filter CPUs
|
||||
for d in logical_devices:
|
||||
if d.device_type == 'CPU' or d.device_type == 'XLA_CPU': # Filter CPUs
|
||||
continue
|
||||
devices.append(session._DeviceAttributes(name, device_type, 0, 0)) # pylint: disable=protected-access
|
||||
devices.append(session._DeviceAttributes(d.name, d.device_type, 0, 0)) # pylint: disable=protected-access
|
||||
return devices
|
||||
else:
|
||||
with ops.Graph().as_default():
|
||||
|
@ -18,11 +18,12 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python import eager
|
||||
from tensorflow.python import framework
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.distribute.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver
|
||||
from tensorflow.python.eager.context import LogicalDevice
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.platform import test
|
||||
from tensorflow.python.training import server_lib
|
||||
@ -45,69 +46,69 @@ class MockBaseClusterResolver(ClusterResolver):
|
||||
@test_util.run_all_in_graph_and_eager_modes
|
||||
class BaseClusterResolverTest(test.TestCase):
|
||||
|
||||
@mock.patch.object(eager.context, "list_devices")
|
||||
@mock.patch.object(framework.config, "list_logical_devices")
|
||||
@mock.patch.object(session.BaseSession, "list_devices")
|
||||
def testNumAcceleratorsSuccess(self, mock_list_devices,
|
||||
mock_eager_list_devices):
|
||||
device_names = [
|
||||
"/job:worker/task:0/device:GPU:0",
|
||||
"/job:worker/task:0/device:GPU:1",
|
||||
"/job:worker/task:0/device:GPU:2",
|
||||
"/job:worker/task:0/device:GPU:3",
|
||||
devices = [
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:0", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:1", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:2", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:3", "GPU"),
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(name, "GPU", 1024, 0)
|
||||
for name in device_names
|
||||
session._DeviceAttributes(d.name, d.device_type, 1024, 0)
|
||||
for d in devices
|
||||
]
|
||||
mock_eager_list_devices.return_value = device_names
|
||||
mock_eager_list_devices.return_value = devices
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = MockBaseClusterResolver()
|
||||
self.assertEqual(resolver.num_accelerators(), {"GPU": 4})
|
||||
|
||||
@mock.patch.object(eager.context, "list_devices")
|
||||
@mock.patch.object(framework.config, "list_logical_devices")
|
||||
@mock.patch.object(session.BaseSession, "list_devices")
|
||||
def testNumAcceleratorsMultiDeviceSuccess(self, mock_list_devices,
|
||||
mock_eager_list_devices):
|
||||
device_names = [
|
||||
"/job:worker/task:0/device:TPU:0",
|
||||
"/job:worker/task:0/device:TPU:1",
|
||||
"/job:worker/task:0/device:TPU:2",
|
||||
"/job:worker/task:0/device:TPU:3",
|
||||
"/job:worker/task:0/device:GPU:0",
|
||||
"/job:worker/task:0/device:GPU:1",
|
||||
"/job:worker/task:0/device:GPU:2",
|
||||
"/job:worker/task:0/device:GPU:3",
|
||||
devices = [
|
||||
LogicalDevice("/job:worker/task:0/device:TPU:0", "TPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:TPU:1", "TPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:TPU:2", "TPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:TPU:3", "TPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:0", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:1", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:2", "GPU"),
|
||||
LogicalDevice("/job:worker/task:0/device:GPU:3", "GPU"),
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(name, name[26:29], 1024, 0)
|
||||
for name in device_names
|
||||
session._DeviceAttributes(d.name, d.device_type, 1024, 0)
|
||||
for d in devices
|
||||
]
|
||||
mock_eager_list_devices.return_value = device_names
|
||||
mock_eager_list_devices.return_value = devices
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = MockBaseClusterResolver()
|
||||
self.assertEqual(resolver.num_accelerators(), {"TPU": 4, "GPU": 4})
|
||||
|
||||
@mock.patch.object(eager.context, "list_devices")
|
||||
@mock.patch.object(framework.config, "list_logical_devices")
|
||||
@mock.patch.object(session.BaseSession, "list_devices")
|
||||
def testNumAcceleratorsFilterTasks(self, mock_list_devices,
|
||||
mock_eager_list_devices):
|
||||
device_names = [
|
||||
"/job:worker1/task:0/device:TPU:0",
|
||||
"/job:worker1/task:0/device:TPU:1",
|
||||
"/job:worker1/task:0/device:GPU:0",
|
||||
"/job:worker1/task:0/device:GPU:1",
|
||||
"/job:worker2/task:1/device:TPU:2",
|
||||
"/job:worker2/task:2/device:TPU:3",
|
||||
"/job:worker2/task:3/device:GPU:2",
|
||||
"/job:worker2/task:4/device:GPU:3",
|
||||
devices = [
|
||||
LogicalDevice("/job:worker1/task:0/device:TPU:0", "TPU"),
|
||||
LogicalDevice("/job:worker1/task:0/device:TPU:1", "TPU"),
|
||||
LogicalDevice("/job:worker1/task:0/device:GPU:0", "GPU"),
|
||||
LogicalDevice("/job:worker1/task:0/device:GPU:1", "GPU"),
|
||||
LogicalDevice("/job:worker2/task:1/device:TPU:2", "TPU"),
|
||||
LogicalDevice("/job:worker2/task:2/device:TPU:3", "TPU"),
|
||||
LogicalDevice("/job:worker2/task:3/device:GPU:2", "GPU"),
|
||||
LogicalDevice("/job:worker2/task:4/device:GPU:3", "GPU"),
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(name, name[27:30], 1024, 0)
|
||||
for name in device_names
|
||||
session._DeviceAttributes(d.name, d.device_type, 1024, 0)
|
||||
for d in devices
|
||||
]
|
||||
mock_eager_list_devices.return_value = device_names
|
||||
mock_eager_list_devices.return_value = devices
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = MockBaseClusterResolver()
|
||||
|
@ -20,9 +20,10 @@ from __future__ import print_function
|
||||
|
||||
import os
|
||||
|
||||
from tensorflow.python import eager
|
||||
from tensorflow.python import framework
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.distribute.cluster_resolver import TFConfigClusterResolver
|
||||
from tensorflow.python.eager.context import LogicalDevice
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.platform import test
|
||||
from tensorflow.python.training import server_lib
|
||||
@ -243,7 +244,7 @@ class TFConfigClusterResolverTest(test.TestCase):
|
||||
cluster_resolver = TFConfigClusterResolver()
|
||||
self.assertEqual('', cluster_resolver.master())
|
||||
|
||||
@mock.patch.object(eager.context, 'list_devices')
|
||||
@mock.patch.object(framework.config, 'list_logical_devices')
|
||||
@mock.patch.object(session.BaseSession, 'list_devices')
|
||||
def testNumAcceleratorsFilterTasksByEnvVar(self, mock_list_devices,
|
||||
mock_eager_list_devices):
|
||||
@ -261,21 +262,21 @@ class TFConfigClusterResolverTest(test.TestCase):
|
||||
}
|
||||
"""
|
||||
|
||||
device_names = [
|
||||
'/job:worker1/task:0/device:TPU:0',
|
||||
'/job:worker1/task:0/device:TPU:1',
|
||||
'/job:worker1/task:0/device:GPU:0',
|
||||
'/job:worker1/task:0/device:GPU:1',
|
||||
'/job:worker2/task:1/device:TPU:2',
|
||||
'/job:worker2/task:2/device:TPU:3',
|
||||
'/job:worker2/task:3/device:GPU:2',
|
||||
'/job:worker2/task:4/device:GPU:3',
|
||||
devices = [
|
||||
LogicalDevice('/job:worker1/task:0/device:TPU:0', 'TPU'),
|
||||
LogicalDevice('/job:worker1/task:0/device:TPU:1', 'TPU'),
|
||||
LogicalDevice('/job:worker1/task:0/device:GPU:0', 'GPU'),
|
||||
LogicalDevice('/job:worker1/task:0/device:GPU:1', 'GPU'),
|
||||
LogicalDevice('/job:worker2/task:1/device:TPU:2', 'TPU'),
|
||||
LogicalDevice('/job:worker2/task:2/device:TPU:3', 'TPU'),
|
||||
LogicalDevice('/job:worker2/task:3/device:GPU:2', 'GPU'),
|
||||
LogicalDevice('/job:worker2/task:4/device:GPU:3', 'GPU'),
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(name, name[27:30], 1024, 0)
|
||||
for name in device_names
|
||||
session._DeviceAttributes(d.name, d.device_type, 1024, 0)
|
||||
for d in devices
|
||||
]
|
||||
mock_eager_list_devices.return_value = device_names
|
||||
mock_eager_list_devices.return_value = devices
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = TFConfigClusterResolver()
|
||||
|
@ -23,9 +23,10 @@ import os
|
||||
import six
|
||||
from six.moves.urllib.error import URLError
|
||||
|
||||
from tensorflow.python import eager
|
||||
from tensorflow.python import framework
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver
|
||||
from tensorflow.python.eager.context import LogicalDevice
|
||||
from tensorflow.python.framework import errors
|
||||
from tensorflow.python.framework import test_util
|
||||
from tensorflow.python.platform import test
|
||||
@ -636,33 +637,33 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
1: [1, 2]
|
||||
})
|
||||
|
||||
@mock.patch.object(eager.context, 'list_devices')
|
||||
@mock.patch.object(framework.config, 'list_logical_devices')
|
||||
@mock.patch.object(session.BaseSession, 'list_devices')
|
||||
@mock.patch.object(resolver, 'is_running_in_gce',
|
||||
mock_is_not_running_in_gce)
|
||||
def testNumAcceleratorsSuccess(self, mock_list_devices,
|
||||
mock_eager_list_devices):
|
||||
device_names = [
|
||||
'/job:tpu_worker/task:0/device:TPU:0',
|
||||
'/job:tpu_worker/task:1/device:TPU:1',
|
||||
'/job:tpu_worker/task:2/device:TPU:0',
|
||||
'/job:tpu_worker/task:3/device:TPU:1',
|
||||
'/job:tpu_worker/task:0/device:TPU:4',
|
||||
'/job:tpu_worker/task:1/device:TPU:5',
|
||||
'/job:tpu_worker/task:2/device:TPU:4',
|
||||
'/job:tpu_worker/task:3/device:TPU:5',
|
||||
devices = [
|
||||
LogicalDevice('/job:tpu_worker/task:0/device:TPU:0', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:1/device:TPU:1', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:2/device:TPU:0', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:3/device:TPU:1', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:0/device:TPU:4', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:1/device:TPU:5', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:2/device:TPU:4', 'TPU'),
|
||||
LogicalDevice('/job:tpu_worker/task:3/device:TPU:5', 'TPU'),
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(
|
||||
name, 'TPU', 1024, 0) for name in device_names
|
||||
session._DeviceAttributes(d.name, d.device_type, 1024, 0)
|
||||
for d in devices
|
||||
]
|
||||
mock_eager_list_devices.return_value = device_names
|
||||
mock_eager_list_devices.return_value = devices
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
cluster_resolver = resolver.TPUClusterResolver(tpu='')
|
||||
self.assertEqual(cluster_resolver.num_accelerators(), {'TPU': 2})
|
||||
|
||||
@mock.patch.object(eager.context, 'list_devices')
|
||||
@mock.patch.object(framework.config, 'list_logical_devices')
|
||||
@mock.patch.object(session.BaseSession, 'list_devices')
|
||||
@mock.patch.object(resolver, 'is_running_in_gce',
|
||||
mock_is_not_running_in_gce)
|
||||
|
@ -52,9 +52,9 @@ def _replica_id():
|
||||
def _mimic_two_cpus():
|
||||
cpus = config.list_physical_devices("CPU")
|
||||
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
])
|
||||
|
||||
|
||||
|
@ -70,8 +70,8 @@ class MultiWorkerContinuousRunTest(test.TestCase, parameterized.TestCase):
|
||||
if gpus:
|
||||
# Set virtual GPU with memory limit of 64MB so that multiple worker
|
||||
# processes can share the physical GPU
|
||||
config.set_virtual_device_configuration(
|
||||
gpus[0], [context.VirtualDeviceConfiguration(64)])
|
||||
config.set_logical_device_configuration(
|
||||
gpus[0], [context.LogicalDeviceConfiguration(64)])
|
||||
for _ in range(100):
|
||||
worker_step_fn()
|
||||
|
||||
|
@ -173,12 +173,13 @@ def set_virtual_cpus_to_at_least(num_virtual_cpus):
|
||||
physical_devices = config.list_physical_devices("CPU")
|
||||
if not physical_devices:
|
||||
raise RuntimeError("No CPUs found")
|
||||
configs = config.get_virtual_device_configuration(physical_devices[0])
|
||||
configs = config.get_logical_device_configuration(physical_devices[0])
|
||||
if configs is None:
|
||||
virtual_devices = [context.VirtualDeviceConfiguration()
|
||||
for _ in range(num_virtual_cpus)]
|
||||
config.set_virtual_device_configuration(
|
||||
physical_devices[0], virtual_devices)
|
||||
logical_devices = [
|
||||
context.LogicalDeviceConfiguration() for _ in range(num_virtual_cpus)
|
||||
]
|
||||
config.set_logical_device_configuration(physical_devices[0],
|
||||
logical_devices)
|
||||
else:
|
||||
if len(configs) < num_virtual_cpus:
|
||||
raise RuntimeError("Already configured with %d < %d virtual CPUs" %
|
||||
|
@ -38,12 +38,12 @@ class StrategyCombinationsTest(test.TestCase, parameterized.TestCase):
|
||||
|
||||
def test3VirtualCPUs(self):
|
||||
cpu_device = config.list_physical_devices("CPU")[0]
|
||||
self.assertLen(config.get_virtual_device_configuration(cpu_device), 3)
|
||||
self.assertLen(config.get_logical_device_configuration(cpu_device), 3)
|
||||
|
||||
def testSetVirtualCPUsAgain(self):
|
||||
strategy_combinations.set_virtual_cpus_to_at_least(2)
|
||||
cpu_device = config.list_physical_devices("CPU")[0]
|
||||
self.assertLen(config.get_virtual_device_configuration(cpu_device), 3)
|
||||
self.assertLen(config.get_logical_device_configuration(cpu_device), 3)
|
||||
|
||||
def testSetVirtualCPUsErrors(self):
|
||||
with self.assertRaises(ValueError):
|
||||
|
@ -24,7 +24,7 @@ import tensorflow as tf
|
||||
|
||||
|
||||
def device_and_data_format():
|
||||
if tf.config.experimental.list_physical_devices('GPU'):
|
||||
if tf.config.list_physical_devices('GPU'):
|
||||
return ('/gpu:0', 'channels_first')
|
||||
return ('/cpu:0', 'channels_last')
|
||||
|
||||
|
@ -234,14 +234,15 @@ class _ContextSwitchStack(threading.local):
|
||||
self.stack.pop()
|
||||
|
||||
|
||||
@tf_export("config.LogicalDevice")
|
||||
class LogicalDevice(
|
||||
collections.namedtuple("LogicalDevice", ["name", "device_type"])):
|
||||
"""Abstraction for a device initialized by the runtime.
|
||||
"""Abstraction for a logical device initialized by the runtime.
|
||||
|
||||
A LogicalDevice corresponds to a initialized instance on a PhysicalDevice or a
|
||||
remote device available in the cluster. Tensors and operations can be placed
|
||||
on a specific LogicalDevice by calling `tf.device()` with the `name` of the
|
||||
LogicalDevice.
|
||||
A `tf.config.LogicalDevice` corresponds to an initialized logical device on a
|
||||
`tf.config.PhysicalDevice` or a remote device visible to the cluster. Tensors
|
||||
and operations can be placed on a specific logical device by calling
|
||||
`tf.device` with a specified `tf.config.LogicalDevice`.
|
||||
|
||||
Fields:
|
||||
name: The fully qualified name of the device. Can be used for Op or function
|
||||
@ -251,16 +252,18 @@ class LogicalDevice(
|
||||
pass
|
||||
|
||||
|
||||
@tf_export("config.experimental.VirtualDeviceConfiguration")
|
||||
class VirtualDeviceConfiguration(
|
||||
collections.namedtuple("VirtualDeviceConfiguration", ["memory_limit"])):
|
||||
"""Configuration class for a `LogicalDevice`.
|
||||
@tf_export("config.LogicalDeviceConfiguration",
|
||||
"config.experimental.VirtualDeviceConfiguration")
|
||||
class LogicalDeviceConfiguration(
|
||||
collections.namedtuple("LogicalDeviceConfiguration", ["memory_limit"])):
|
||||
"""Configuration class for a logical devices.
|
||||
|
||||
The class specifies the parameters for a `LogicalDevice` used during runtime
|
||||
The class specifies the parameters to configure a `tf.config.PhysicalDevice`
|
||||
as it is initialized to a `tf.config.LogicalDevice` during runtime
|
||||
initialization. Not all fields are valid for all device types.
|
||||
|
||||
See `tf.config.experimental.get_virtual_device_configuration` and
|
||||
`tf.config.experimental.set_virtual_device_configuration` for usage examples.
|
||||
See `tf.config.get_logical_device_configuration` and
|
||||
`tf.config.set_logical_device_configuration` for usage examples.
|
||||
|
||||
Fields:
|
||||
memory_limit: (optional) Maximum memory (in MB) to allocate on the virtual
|
||||
@ -268,9 +271,10 @@ class VirtualDeviceConfiguration(
|
||||
"""
|
||||
|
||||
def __new__(cls, memory_limit=None):
|
||||
return super(VirtualDeviceConfiguration, cls).__new__(cls, memory_limit)
|
||||
return super(LogicalDeviceConfiguration, cls).__new__(cls, memory_limit)
|
||||
|
||||
|
||||
@tf_export("config.PhysicalDevice")
|
||||
class PhysicalDevice(
|
||||
collections.namedtuple("PhysicalDevice", ["name", "device_type"])):
|
||||
"""Abstraction for a locally visible physical device.
|
||||
@ -280,10 +284,13 @@ class PhysicalDevice(
|
||||
customize certain properties of the device such as it's visibility or memory
|
||||
configuration.
|
||||
|
||||
Once a PhysicalDevice is initialized one or many LogicalDevice objects are
|
||||
created. Use tf.config.set_virtual_device_configuration() to create multiple
|
||||
LogicalDevice objects for a PhysicalDevice. This is useful when separation
|
||||
between models is needed.
|
||||
Once a visible `tf.config.PhysicalDevice` is initialized one or more
|
||||
`tf.config.LogicalDevice` objects are created. Use
|
||||
`tf.config.set_visible_devices` to configure the visibility of a physical
|
||||
device and `tf.config.set_logical_device_configuration` to configure multiple
|
||||
`tf.config.LogicalDevice` objects for a `tf.config.PhysicalDevice`. This is
|
||||
useful when separation between models is needed or to simulate a multi-device
|
||||
environment.
|
||||
|
||||
Fields:
|
||||
name: Unique identifier for device.
|
||||
@ -1114,8 +1121,8 @@ class Context(object):
|
||||
if num_cpus == 0:
|
||||
self.set_visible_devices([], "CPU")
|
||||
elif num_cpus > 1:
|
||||
self.set_virtual_device_configuration(
|
||||
cpus[0], [VirtualDeviceConfiguration() for _ in range(num_cpus)])
|
||||
self.set_logical_device_configuration(
|
||||
cpus[0], [LogicalDeviceConfiguration() for _ in range(num_cpus)])
|
||||
|
||||
# Parse GPU options
|
||||
gpus = [d for d in self._physical_devices if d.device_type == "GPU"]
|
||||
@ -1224,7 +1231,7 @@ class Context(object):
|
||||
|
||||
self._memory_growth_map[dev] = enable
|
||||
|
||||
def get_virtual_device_configuration(self, dev):
|
||||
def get_logical_device_configuration(self, dev):
|
||||
"""Get the virtual device configuration for a PhysicalDevice."""
|
||||
self._initialize_physical_devices()
|
||||
|
||||
@ -1233,7 +1240,7 @@ class Context(object):
|
||||
|
||||
return self._virtual_device_map.get(dev)
|
||||
|
||||
def set_virtual_device_configuration(self, dev, virtual_devices):
|
||||
def set_logical_device_configuration(self, dev, virtual_devices):
|
||||
"""Set the virtual device configuration for a PhysicalDevice."""
|
||||
self._initialize_physical_devices()
|
||||
|
||||
@ -1801,17 +1808,6 @@ def device(name):
|
||||
return context().device(name)
|
||||
|
||||
|
||||
@tf_export("config.experimental_list_devices")
|
||||
def list_devices():
|
||||
"""List the names of the available devices.
|
||||
|
||||
Returns:
|
||||
Names of the available devices, as a `list`.
|
||||
"""
|
||||
ensure_initialized()
|
||||
return context().devices()
|
||||
|
||||
|
||||
@tf_export("debugging.get_log_device_placement")
|
||||
def get_log_device_placement():
|
||||
"""Get if device placements are logged.
|
||||
|
@ -69,9 +69,9 @@ def current_device():
|
||||
def configure_virtual_cpus():
|
||||
cpus = config.list_physical_devices('CPU')
|
||||
# Set 2 virtual CPUs
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
|
||||
|
@ -55,11 +55,11 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase):
|
||||
super(FunctionGradientsTest, self).setUp()
|
||||
cpus = config.list_physical_devices('CPU')
|
||||
# Set 4 virtual CPUs
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
def testGraphModeWithGradients(self):
|
||||
|
@ -133,11 +133,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
|
||||
super(FunctionTest, self).setUp()
|
||||
cpus = config.list_physical_devices('CPU')
|
||||
# Set 4 virtual CPUs
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
def testBasic(self):
|
||||
|
@ -22,8 +22,8 @@ from absl import flags
|
||||
from absl.testing import absltest
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import remote
|
||||
from tensorflow.python.framework import config
|
||||
from tensorflow.python.tpu import tpu_strategy_util
|
||||
|
||||
FLAGS = flags.FLAGS
|
||||
@ -56,18 +56,16 @@ class RemoteCloudTPUTest(absltest.TestCase):
|
||||
"""Test that we can connect to a real Cloud TPU."""
|
||||
|
||||
def test_connect(self):
|
||||
self.assertCountEqual(
|
||||
EXPECTED_DEVICES_PRE_CONNECT,
|
||||
context.list_devices())
|
||||
self.assertCountEqual(EXPECTED_DEVICES_PRE_CONNECT,
|
||||
config.list_logical_devices())
|
||||
|
||||
resolver = tpu_cluster_resolver.TPUClusterResolver(
|
||||
tpu=FLAGS.tpu, zone=FLAGS.zone, project=FLAGS.project
|
||||
)
|
||||
remote.connect_to_cluster(resolver)
|
||||
|
||||
self.assertCountEqual(
|
||||
EXPECTED_DEVICES_AFTER_CONNECT,
|
||||
context.list_devices())
|
||||
self.assertCountEqual(EXPECTED_DEVICES_AFTER_CONNECT,
|
||||
config.list_logical_devices())
|
||||
|
||||
tpu_strategy_util.initialize_tpu_system(resolver)
|
||||
|
||||
|
@ -19,6 +19,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.util import deprecation
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
@ -293,42 +294,60 @@ def set_synchronous_execution(enable):
|
||||
context.context().execution_mode = context.ASYNC
|
||||
|
||||
|
||||
@tf_export('config.experimental.list_physical_devices')
|
||||
@tf_export('config.list_physical_devices',
|
||||
'config.experimental.list_physical_devices')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.list_physical_devices')
|
||||
def list_physical_devices(device_type=None):
|
||||
"""Return a list of physical devices visible to the host runtime.
|
||||
|
||||
Physical devices are hardware devices present on the host machine. By default
|
||||
all discovered CPU and GPU devices are considered visible. The
|
||||
`tf.config.experimental.list_physical_devices` API allows querying the
|
||||
hardware prior to runtime initialization.
|
||||
all discovered CPU and GPU devices are considered visible.
|
||||
|
||||
This API allows querying the physical hardware resources prior to runtime
|
||||
initialization. Thus, giving an opportunity to call any additional
|
||||
configuration APIs. This is in contrast to `tf.config.list_logical_devices`,
|
||||
which triggers runtime initialization in order to list the configured devices.
|
||||
|
||||
The following example lists the number of visible GPUs on the host.
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> print("Num GPUs:", len(physical_devices))
|
||||
Num GPUs: ...
|
||||
|
||||
However, the number of GPUs available to the runtime may change during runtime
|
||||
initialization due to marking certain devices as not visible or configuring
|
||||
multiple logical devices.
|
||||
|
||||
Args:
|
||||
device_type: (optional string) Only include devices matching this device
|
||||
type. For example "CPU" or "GPU".
|
||||
|
||||
Returns:
|
||||
List of discovered `PhysicalDevice`s
|
||||
List of discovered `tf.config.PhysicalDevice` objects
|
||||
"""
|
||||
return context.context().list_physical_devices(device_type)
|
||||
|
||||
|
||||
@tf_export('config.experimental.list_logical_devices')
|
||||
@tf_export('config.list_logical_devices',
|
||||
'config.experimental.list_logical_devices')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.list_logical_devices')
|
||||
def list_logical_devices(device_type=None):
|
||||
"""Return a list of logical devices created by runtime.
|
||||
|
||||
Logical devices may correspond to physical devices or remote devices in the
|
||||
cluster. Operations and tensors may be placed on these devices by using the
|
||||
`name` of the `LogicalDevice`.
|
||||
`name` of the `tf.config.LogicalDevice`.
|
||||
|
||||
Calling `tf.config.list_logical_devices` triggers the runtime to configure any
|
||||
`tf.config.PhysicalDevice` visible to the runtime, thereby preventing
|
||||
further configuration. To avoid runtime initialization, call
|
||||
`tf.config.list_physical_devices` instead.
|
||||
|
||||
For example:
|
||||
|
||||
>>> logical_devices = tf.config.experimental.list_logical_devices('GPU')
|
||||
>>> logical_devices = tf.config.list_logical_devices('GPU')
|
||||
>>> if len(logical_devices) > 0:
|
||||
... # Allocate on GPU:0
|
||||
... with tf.device(logical_devices[0].name):
|
||||
@ -347,7 +366,10 @@ def list_logical_devices(device_type=None):
|
||||
return context.context().list_logical_devices(device_type=device_type)
|
||||
|
||||
|
||||
@tf_export('config.experimental.get_visible_devices')
|
||||
@tf_export('config.get_visible_devices',
|
||||
'config.experimental.get_visible_devices')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.get_visible_devices')
|
||||
def get_visible_devices(device_type=None):
|
||||
"""Get the list of visible physical devices.
|
||||
|
||||
@ -357,11 +379,11 @@ def get_visible_devices(device_type=None):
|
||||
|
||||
The following example verifies all visible GPUs have been disabled:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> try:
|
||||
... # Disable all GPUS
|
||||
... tf.config.experimental.set_visible_devices([], 'GPU')
|
||||
... visible_devices = tf.config.experimental.get_visible_devices()
|
||||
... tf.config.set_visible_devices([], 'GPU')
|
||||
... visible_devices = tf.config.get_visible_devices()
|
||||
... for device in visible_devices:
|
||||
... assert device.device_type != 'GPU'
|
||||
... except:
|
||||
@ -378,7 +400,10 @@ def get_visible_devices(device_type=None):
|
||||
return context.context().get_visible_devices(device_type)
|
||||
|
||||
|
||||
@tf_export('config.experimental.set_visible_devices')
|
||||
@tf_export('config.set_visible_devices',
|
||||
'config.experimental.set_visible_devices')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.set_visible_devices')
|
||||
def set_visible_devices(devices, device_type=None):
|
||||
"""Set the list of visible devices.
|
||||
|
||||
@ -389,11 +414,11 @@ def set_visible_devices(devices, device_type=None):
|
||||
|
||||
The following example demonstrates disabling the first GPU on the machine.
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> try:
|
||||
... # Disable first GPU
|
||||
... tf.config.experimental.set_visible_devices(physical_devices[1:], 'GPU')
|
||||
... logical_devices = tf.config.experimental.list_logical_devices('GPU')
|
||||
... tf.config.set_visible_devices(physical_devices[1:], 'GPU')
|
||||
... logical_devices = tf.config.list_logical_devices('GPU')
|
||||
... # Logical device was not created for first GPU
|
||||
... assert len(logical_devices) == len(physical_devices) - 1
|
||||
... except:
|
||||
@ -421,7 +446,7 @@ def get_memory_growth(device):
|
||||
|
||||
For example:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> try:
|
||||
... tf.config.experimental.set_memory_growth(physical_devices[0], True)
|
||||
... assert tf.config.experimental.get_memory_growth(physical_devices[0])
|
||||
@ -451,7 +476,7 @@ def set_memory_growth(device, enable):
|
||||
|
||||
For example:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> try:
|
||||
... tf.config.experimental.set_memory_growth(physical_devices[0], True)
|
||||
... except:
|
||||
@ -469,27 +494,30 @@ def set_memory_growth(device, enable):
|
||||
context.context().set_memory_growth(device, enable)
|
||||
|
||||
|
||||
@tf_export('config.experimental.get_virtual_device_configuration')
|
||||
def get_virtual_device_configuration(device):
|
||||
"""Get the virtual device configuration for a `PhysicalDevice`.
|
||||
@tf_export('config.get_logical_device_configuration',
|
||||
'config.experimental.get_virtual_device_configuration')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.get_virtual_device_configuration')
|
||||
def get_logical_device_configuration(device):
|
||||
"""Get the virtual device configuration for a `tf.config.PhysicalDevice`.
|
||||
|
||||
Returns the list of `tf.config.experimental.VirtualDeviceConfiguration`
|
||||
Returns the list of `tf.config.LogicalDeviceConfiguration`
|
||||
objects previously configured by a call to
|
||||
`tf.config.experimental.set_virtual_device_configuration()`.
|
||||
`tf.config.set_logical_device_configuration`.
|
||||
|
||||
For example:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('CPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('CPU')
|
||||
>>> assert len(physical_devices) == 1, "No CPUs found"
|
||||
>>> configs = tf.config.experimental.get_virtual_device_configuration(
|
||||
>>> configs = tf.config.get_logical_device_configuration(
|
||||
... physical_devices[0])
|
||||
>>> try:
|
||||
... assert configs is None
|
||||
... tf.config.experimental.set_virtual_device_configuration(
|
||||
... tf.config.set_logical_device_configuration(
|
||||
... physical_devices[0],
|
||||
... [tf.config.experimental.VirtualDeviceConfiguration(),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration()])
|
||||
... configs = tf.config.experimental.get_virtual_device_configuration(
|
||||
... [tf.config.LogicalDeviceConfiguration(),
|
||||
... tf.config.LogicalDeviceConfiguration()])
|
||||
... configs = tf.config.get_logical_device_configuration(
|
||||
... physical_devices[0])
|
||||
... assert len(configs) == 2
|
||||
... except:
|
||||
@ -500,77 +528,79 @@ def get_virtual_device_configuration(device):
|
||||
device: `PhysicalDevice` to query
|
||||
|
||||
Returns:
|
||||
List of `tf.config.experimental.VirtualDeviceConfiguration` objects or
|
||||
List of `tf.config.LogicalDeviceConfiguration` objects or
|
||||
`None` if no virtual device configuration has been set for this physical
|
||||
device.
|
||||
"""
|
||||
return context.context().get_virtual_device_configuration(device)
|
||||
return context.context().get_logical_device_configuration(device)
|
||||
|
||||
|
||||
@tf_export('config.experimental.set_virtual_device_configuration')
|
||||
def set_virtual_device_configuration(device, virtual_devices):
|
||||
"""Set the virtual device configuration for a `PhysicalDevice`.
|
||||
@tf_export('config.set_logical_device_configuration',
|
||||
'config.experimental.set_virtual_device_configuration')
|
||||
@deprecation.deprecated_endpoints(
|
||||
'config.experimental.set_virtual_device_configuration')
|
||||
def set_logical_device_configuration(device, logical_devices):
|
||||
"""Set the logical device configuration for a `tf.config.PhysicalDevice`.
|
||||
|
||||
A visible `PhysicalDevice` will by default have a single `LogicalDevice`
|
||||
associated with it once the runtime is initialized. Specifying a list of
|
||||
`tf.config.experimental.VirtualDeviceConfiguration`s allows multiple
|
||||
devices to be on the same `PhysicalDevice`.
|
||||
A visible `tf.config.PhysicalDevice` will by default have a single
|
||||
`tf.config.LogicalDevice` associated with it once the runtime is initialized.
|
||||
Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows
|
||||
multiple devices to be created on the same `tf.config.PhysicalDevice`.
|
||||
|
||||
The following example splits the CPU into 2 virtual devices:
|
||||
The following example splits the CPU into 2 logical devices:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('CPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('CPU')
|
||||
>>> assert len(physical_devices) == 1, "No CPUs found"
|
||||
>>> # Specify 2 virtual CPUs. Note currently memory limit is not supported.
|
||||
>>> try:
|
||||
... tf.config.experimental.set_virtual_device_configuration(
|
||||
... tf.config.set_logical_device_configuration(
|
||||
... physical_devices[0],
|
||||
... [tf.config.experimental.VirtualDeviceConfiguration(),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration()])
|
||||
... logical_devices = tf.config.experimental.list_logical_devices('CPU')
|
||||
... [tf.config.LogicalDeviceConfiguration(),
|
||||
... tf.config.LogicalDeviceConfiguration()])
|
||||
... logical_devices = tf.config.list_logical_devices('CPU')
|
||||
... assert len(logical_devices) == 2
|
||||
...
|
||||
... tf.config.experimental.set_virtual_device_configuration(
|
||||
... tf.config.set_logical_device_configuration(
|
||||
... physical_devices[0],
|
||||
... [tf.config.experimental.VirtualDeviceConfiguration(),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration(),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration(),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration()])
|
||||
... [tf.config.LogicalDeviceConfiguration(),
|
||||
... tf.config.LogicalDeviceConfiguration(),
|
||||
... tf.config.LogicalDeviceConfiguration(),
|
||||
... tf.config.LogicalDeviceConfiguration()])
|
||||
... except:
|
||||
... # Cannot modify virtual devices once initialized.
|
||||
... # Cannot modify logical devices once initialized.
|
||||
... pass
|
||||
|
||||
The following example splits the GPU into 2 virtual devices with 100 MB each:
|
||||
The following example splits the GPU into 2 logical devices with 100 MB each:
|
||||
|
||||
>>> physical_devices = tf.config.experimental.list_physical_devices('GPU')
|
||||
>>> physical_devices = tf.config.list_physical_devices('GPU')
|
||||
>>> try:
|
||||
... tf.config.experimental.set_virtual_device_configuration(
|
||||
... tf.config.set_logical_device_configuration(
|
||||
... physical_devices[0],
|
||||
... [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=100),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration(memory_limit=100)])
|
||||
... [tf.config.LogicalDeviceConfiguration(memory_limit=100),
|
||||
... tf.config.LogicalDeviceConfiguration(memory_limit=100)])
|
||||
...
|
||||
... logical_devices = tf.config.experimental.list_logical_devices('GPU')
|
||||
... logical_devices = tf.config.list_logical_devices('GPU')
|
||||
... assert len(logical_devices) == len(physical_devices) + 1
|
||||
...
|
||||
... tf.config.experimental.set_virtual_device_configuration(
|
||||
... tf.config.set_logical_device_configuration(
|
||||
... physical_devices[0],
|
||||
... [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10),
|
||||
... tf.config.experimental.VirtualDeviceConfiguration(memory_limit=10)])
|
||||
... [tf.config.LogicalDeviceConfiguration(memory_limit=10),
|
||||
... tf.config.LogicalDeviceConfiguration(memory_limit=10)])
|
||||
... except:
|
||||
... # Invalid device or cannot modify virtual devices once initialized.
|
||||
... # Invalid device or cannot modify logical devices once initialized.
|
||||
... pass
|
||||
|
||||
Args:
|
||||
device: The `PhysicalDevice` to configure.
|
||||
virtual_devices: (optional) List of
|
||||
`tf.config.experimental.VirtualDeviceConfiguration` objects to allocate
|
||||
for the specified `PhysicalDevice`. If None, the default configuration
|
||||
will be used.
|
||||
logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration`
|
||||
objects to allocate for the specified `PhysicalDevice`. If None, the
|
||||
default configuration will be used.
|
||||
|
||||
Raises:
|
||||
ValueError: If argument validation fails.
|
||||
RuntimeError: Runtime is already initialized.
|
||||
"""
|
||||
context.context().set_virtual_device_configuration(device, virtual_devices)
|
||||
context.context().set_logical_device_configuration(device, logical_devices)
|
||||
|
||||
|
||||
@tf_export('config.experimental.enable_mlir_bridge')
|
||||
|
@ -364,9 +364,9 @@ class DeviceTest(test.TestCase):
|
||||
cpus = config.list_physical_devices('CPU')
|
||||
self.assertEqual(len(cpus), 1)
|
||||
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
context.ensure_initialized()
|
||||
@ -393,16 +393,16 @@ class DeviceTest(test.TestCase):
|
||||
|
||||
# Modifying the CPU configuration is not supported
|
||||
with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'):
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
# Setting the same CPU configuration is fine
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
@test_util.run_gpu_only
|
||||
@ -456,12 +456,12 @@ class DeviceTest(test.TestCase):
|
||||
gpus = config.list_physical_devices('GPU')
|
||||
self.assertNotEqual(len(gpus), 0)
|
||||
|
||||
self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(memory_limit=10),
|
||||
context.VirtualDeviceConfiguration(memory_limit=10)
|
||||
self.assertIsNone(config.get_logical_device_configuration(gpus[-1]))
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(memory_limit=10),
|
||||
context.LogicalDeviceConfiguration(memory_limit=10)
|
||||
])
|
||||
self.assertEqual(len(config.get_virtual_device_configuration(gpus[-1])), 2)
|
||||
self.assertEqual(len(config.get_logical_device_configuration(gpus[-1])), 2)
|
||||
|
||||
logical_gpus = config.list_logical_devices('GPU')
|
||||
self.assertTrue(len(logical_gpus), len(gpus) + 1)
|
||||
@ -477,22 +477,22 @@ class DeviceTest(test.TestCase):
|
||||
|
||||
# Modifying the GPU configuration is not supported
|
||||
with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'):
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(memory_limit=20),
|
||||
context.VirtualDeviceConfiguration(memory_limit=20)
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(memory_limit=20),
|
||||
context.LogicalDeviceConfiguration(memory_limit=20)
|
||||
])
|
||||
|
||||
with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'):
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(memory_limit=10),
|
||||
context.VirtualDeviceConfiguration(memory_limit=10),
|
||||
context.VirtualDeviceConfiguration(memory_limit=10)
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(memory_limit=10),
|
||||
context.LogicalDeviceConfiguration(memory_limit=10),
|
||||
context.LogicalDeviceConfiguration(memory_limit=10)
|
||||
])
|
||||
|
||||
# Setting the same GPU configuration is fine
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(memory_limit=10),
|
||||
context.VirtualDeviceConfiguration(memory_limit=10)
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(memory_limit=10),
|
||||
context.LogicalDeviceConfiguration(memory_limit=10)
|
||||
])
|
||||
|
||||
@test_util.run_gpu_only
|
||||
@ -554,15 +554,15 @@ class DeviceTest(test.TestCase):
|
||||
self.assertTrue(c.gpu_options.allow_growth)
|
||||
|
||||
with self.assertRaisesRegexp(ValueError, 'memory limit'):
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
|
||||
self.assertIsNone(config.get_virtual_device_configuration(gpus[-1]))
|
||||
config.set_virtual_device_configuration(gpus[-1], [
|
||||
context.VirtualDeviceConfiguration(memory_limit=10),
|
||||
context.VirtualDeviceConfiguration(memory_limit=10)
|
||||
self.assertIsNone(config.get_logical_device_configuration(gpus[-1]))
|
||||
config.set_logical_device_configuration(gpus[-1], [
|
||||
context.LogicalDeviceConfiguration(memory_limit=10),
|
||||
context.LogicalDeviceConfiguration(memory_limit=10)
|
||||
])
|
||||
|
||||
c = context.context().config
|
||||
|
@ -1459,8 +1459,8 @@ def with_forward_compatibility_horizons(*horizons):
|
||||
return decorator
|
||||
|
||||
|
||||
@deprecation.deprecated(
|
||||
None, "Use `tf.config.experimental.list_physical_devices('GPU')` instead.")
|
||||
@deprecation.deprecated(None,
|
||||
"Use `tf.config.list_physical_devices('GPU')` instead.")
|
||||
@tf_export("test.is_gpu_available")
|
||||
def is_gpu_available(cuda_only=False, min_cuda_compute_capability=None):
|
||||
"""Returns whether TensorFlow can access a GPU.
|
||||
|
@ -41,6 +41,7 @@ from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import function as eager_function
|
||||
from tensorflow.python.eager import lift_to_graph
|
||||
from tensorflow.python.framework import composite_tensor
|
||||
from tensorflow.python.framework import config
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import device as tfdev
|
||||
from tensorflow.python.framework import dtypes as dtypes_module
|
||||
@ -633,7 +634,7 @@ def _get_available_gpus():
|
||||
"""
|
||||
if ops.executing_eagerly_outside_functions():
|
||||
# Returns names of devices directly.
|
||||
return [name for name in context.list_devices() if 'GPU' in name]
|
||||
return [d.name for d in config.list_logical_devices('GPU')]
|
||||
|
||||
global _LOCAL_DEVICES
|
||||
if _LOCAL_DEVICES is None:
|
||||
|
@ -45,15 +45,14 @@ class TestMultiGPUModel(test.TestCase):
|
||||
super(TestMultiGPUModel, self).__init__(methodName)
|
||||
gpu_devices = config.list_physical_devices('GPU')
|
||||
xla_gpu_devices = config.list_physical_devices('XLA_GPU')
|
||||
# NOTE: XLA devices don't support the set_virtual_device_configuration
|
||||
# NOTE: XLA devices don't support the set_logical_device_configuration
|
||||
# codepaths.
|
||||
if len(gpu_devices) == 1 and not xla_gpu_devices:
|
||||
# A GPU is available, simulate 2 instead.
|
||||
config.set_virtual_device_configuration(
|
||||
gpu_devices[0], [
|
||||
context.VirtualDeviceConfiguration(500),
|
||||
context.VirtualDeviceConfiguration(500)
|
||||
])
|
||||
config.set_logical_device_configuration(gpu_devices[0], [
|
||||
context.LogicalDeviceConfiguration(500),
|
||||
context.LogicalDeviceConfiguration(500)
|
||||
])
|
||||
|
||||
def test_multi_gpu_test_simple_model(self):
|
||||
gpus = 2
|
||||
|
@ -59,9 +59,9 @@ class CollectiveOpGPUTest(test.TestCase):
|
||||
if len(gpus) < 1:
|
||||
self.skipTest('Expected at least 1 GPU but found {} GPUs'.format(
|
||||
len(gpus)))
|
||||
config.set_virtual_device_configuration(gpus[0], [
|
||||
context.VirtualDeviceConfiguration(1024),
|
||||
context.VirtualDeviceConfiguration(1024)
|
||||
config.set_logical_device_configuration(gpus[0], [
|
||||
context.LogicalDeviceConfiguration(1024),
|
||||
context.LogicalDeviceConfiguration(1024)
|
||||
])
|
||||
context.ensure_initialized()
|
||||
|
||||
|
@ -350,9 +350,9 @@ class CollectiveOpTest(test.TestCase):
|
||||
def testCollectiveGroupSizeMismatch(self):
|
||||
cpus = config.list_physical_devices('CPU')
|
||||
self.assertEqual(len(cpus), 1)
|
||||
config.set_virtual_device_configuration(cpus[0], [
|
||||
context.VirtualDeviceConfiguration(),
|
||||
context.VirtualDeviceConfiguration()
|
||||
config.set_logical_device_configuration(cpus[0], [
|
||||
context.LogicalDeviceConfiguration(),
|
||||
context.LogicalDeviceConfiguration()
|
||||
])
|
||||
context.ensure_initialized()
|
||||
|
||||
|
@ -24,6 +24,7 @@ import re
|
||||
from tensorflow.core.protobuf import config_pb2
|
||||
from tensorflow.python.client import session as session_lib
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.framework import config
|
||||
from tensorflow.python.framework import device as tf_device
|
||||
from tensorflow.python.framework import errors
|
||||
from tensorflow.python.framework import ops
|
||||
@ -35,7 +36,6 @@ _RETRY_TIMES = 12 * 24 # 1 day
|
||||
_INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS = 300 * 1000 # 5 mins
|
||||
|
||||
_TPU_DEVICE_REG = re.compile(r'.*task:(\d+)/.*device:TPU:(\d+)$')
|
||||
_DEVICE_TYPE_REGEX = re.compile('.*device:([^:]+).*')
|
||||
|
||||
_DEFAULT_JOB_NAME = 'tpu_worker'
|
||||
_DEFAULT_COORDINATOR_JOB_NAME = 'coordinator'
|
||||
@ -60,16 +60,12 @@ def _query_tpu_system_metadata(master_address, cluster_def=None,
|
||||
device_dict = collections.defaultdict(list)
|
||||
|
||||
if context.executing_eagerly():
|
||||
device_names = context.list_devices()
|
||||
logical_devices = config.list_logical_devices()
|
||||
devices = []
|
||||
|
||||
# We want the output type to match in both eager and session mode
|
||||
for name in device_names:
|
||||
device_match = _DEVICE_TYPE_REGEX.match(name)
|
||||
device_type = 'CPU'
|
||||
if device_match:
|
||||
device_type = device_match.group(1)
|
||||
devices.append(session_lib._DeviceAttributes(name, device_type, 0, 0)) # pylint: disable=protected-access
|
||||
for d in logical_devices:
|
||||
devices.append(session_lib._DeviceAttributes(d.name, d.device_type, 0, 0)) # pylint: disable=protected-access
|
||||
else:
|
||||
# TODO(b/120564445): Replace with standard library for retries.
|
||||
retry_count = 1
|
||||
@ -179,9 +175,9 @@ def _obtain_topology(master_address, cluster_def):
|
||||
|
||||
def get_session_config_with_timeout(timeout_in_secs, cluster_def):
|
||||
"""Returns a session given a timeout and a cluster configuration."""
|
||||
config = config_pb2.ConfigProto(
|
||||
config_proto = config_pb2.ConfigProto(
|
||||
operation_timeout_in_ms=timeout_in_secs, cluster_def=cluster_def)
|
||||
return config
|
||||
return config_proto
|
||||
|
||||
|
||||
def master_job(master, cluster_def):
|
||||
|
@ -0,0 +1,19 @@
|
||||
path: "tensorflow.config.LogicalDeviceConfiguration"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "memory_limit"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
path: "tensorflow.config.LogicalDevice"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "device_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "name"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
path: "tensorflow.config.PhysicalDevice"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "device_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "name"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
path: "tensorflow.config.experimental.VirtualDeviceConfiguration"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "memory_limit"
|
||||
|
@ -54,7 +54,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "set_virtual_device_configuration"
|
||||
argspec: "args=[\'device\', \'virtual_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_visible_devices"
|
||||
|
@ -1,5 +1,17 @@
|
||||
path: "tensorflow.config"
|
||||
tf_module {
|
||||
member {
|
||||
name: "LogicalDevice"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LogicalDeviceConfiguration"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "PhysicalDevice"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "experimental"
|
||||
mtype: "<type \'module\'>"
|
||||
@ -24,20 +36,40 @@ tf_module {
|
||||
name: "experimental_functions_run_eagerly"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "experimental_list_devices"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "experimental_run_functions_eagerly"
|
||||
argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_logical_device_configuration"
|
||||
argspec: "args=[\'device\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_soft_device_placement"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_visible_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "list_logical_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "list_physical_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "set_logical_device_configuration"
|
||||
argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_soft_device_placement"
|
||||
argspec: "args=[\'enabled\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_visible_devices"
|
||||
argspec: "args=[\'devices\', \'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
path: "tensorflow.config.LogicalDeviceConfiguration"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "memory_limit"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
path: "tensorflow.config.LogicalDevice"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDevice\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "device_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "name"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
path: "tensorflow.config.PhysicalDevice"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.PhysicalDevice\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "device_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "name"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "count"
|
||||
}
|
||||
member_method {
|
||||
name: "index"
|
||||
}
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
path: "tensorflow.config.experimental.VirtualDeviceConfiguration"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.VirtualDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<class \'tensorflow.python.eager.context.LogicalDeviceConfiguration\'>"
|
||||
is_instance: "<type \'tuple\'>"
|
||||
member {
|
||||
name: "memory_limit"
|
||||
|
@ -54,7 +54,7 @@ tf_module {
|
||||
}
|
||||
member_method {
|
||||
name: "set_virtual_device_configuration"
|
||||
argspec: "args=[\'device\', \'virtual_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_visible_devices"
|
||||
|
@ -1,5 +1,17 @@
|
||||
path: "tensorflow.config"
|
||||
tf_module {
|
||||
member {
|
||||
name: "LogicalDevice"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "LogicalDeviceConfiguration"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "PhysicalDevice"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "experimental"
|
||||
mtype: "<type \'module\'>"
|
||||
@ -24,20 +36,40 @@ tf_module {
|
||||
name: "experimental_functions_run_eagerly"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "experimental_list_devices"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "experimental_run_functions_eagerly"
|
||||
argspec: "args=[\'run_eagerly\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_logical_device_configuration"
|
||||
argspec: "args=[\'device\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_soft_device_placement"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_visible_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "list_logical_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "list_physical_devices"
|
||||
argspec: "args=[\'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "set_logical_device_configuration"
|
||||
argspec: "args=[\'device\', \'logical_devices\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_soft_device_placement"
|
||||
argspec: "args=[\'enabled\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "set_visible_devices"
|
||||
argspec: "args=[\'devices\', \'device_type\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user