Making Cluster Resolvers visible by default for TensorFlow 2.0 transition

PiperOrigin-RevId: 231328553
This commit is contained in:
Frank Chen 2019-01-28 18:43:15 -08:00 committed by TensorFlower Gardener
parent e7f1a44a5f
commit a39f31a942
37 changed files with 768 additions and 184 deletions

View File

@ -23,7 +23,7 @@ from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
@ -36,7 +36,7 @@ _allowed_symbols = [
'ClusterResolver',
'SimpleClusterResolver',
'UnionClusterResolver',
'GceClusterResolver',
'GCEClusterResolver',
'KubernetesClusterResolver',
'TFConfigClusterResolver',
'TPUClusterResolver',

View File

@ -25,7 +25,7 @@ from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
@ -43,7 +43,7 @@ _allowed_symbols = [
'ClusterResolver',
'SimpleClusterResolver',
'UnionClusterResolver',
'GceClusterResolver',
'GCEClusterResolver',
'KubernetesClusterResolver',
'TFConfigClusterResolver',
'TPUClusterResolver',

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Stub file for GceClusterResolver to maintain backwards compatibility."""
"""Stub file for GCEClusterResolver to maintain backwards compatibility."""
from __future__ import absolute_import
from __future__ import division
@ -23,13 +23,14 @@ from __future__ import print_function
# existing OSS code will not be broken.
# pylint: disable=unused-import
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
# pylint: enable=unused-import
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
'GceClusterResolver',
'GCEClusterResolver',
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -39,7 +39,7 @@ from tensorflow.python.distribute import input_lib
from tensorflow.python.distribute import numpy_dataset
from tensorflow.python.distribute import reduce_util
from tensorflow.python.distribute import values
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver_lib
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
from tensorflow.python.eager import context
from tensorflow.python.eager import tape
from tensorflow.python.framework import constant_op
@ -65,7 +65,7 @@ def initialize_tpu_system(cluster_resolver=None):
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = resolver_lib.TPUClusterResolver("")
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
@ -246,7 +246,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
super(TPUExtended, self).__init__(container_strategy)
if tpu_cluster_resolver is None:
tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")
tpu_cluster_resolver = TPUClusterResolver("")
if steps_per_run is None:
# TODO(frankchn): Warn when we are being used by DS/Keras and this is

View File

@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import
from tensorflow.python.distribute import cluster_resolver
from tensorflow.python.distribute import distribute_lib
from tensorflow.python.distribute import distribution_strategy_context
from tensorflow.python.distribute import mirrored_strategy

View File

@ -18,40 +18,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver import cluster_resolver
from tensorflow.python.distribute.cluster_resolver import gce_cluster_resolver
from tensorflow.python.distribute.cluster_resolver import kubernetes_cluster_resolver
from tensorflow.python.distribute.cluster_resolver import slurm_cluster_resolver
from tensorflow.python.distribute.cluster_resolver import tfconfig_cluster_resolver
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
from tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver import TPUClusterResolver
from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
'cluster_resolver',
'gce_cluster_resolver',
'kubernetes_cluster_resolver',
'slurm_cluster_resolver',
'tfconfig_cluster_resolver',
'tpu_cluster_resolver',
'ClusterResolver',
'SimpleClusterResolver',
'UnionClusterResolver',
'GceClusterResolver',
'KubernetesClusterResolver',
'TFConfigClusterResolver',
'TPUClusterResolver',
'SlurmClusterResolver',
]
remove_undocumented(__name__, _allowed_symbols)

View File

@ -20,11 +20,13 @@ from __future__ import print_function
import abc
import collections
import six
from tensorflow.python.client import session
from tensorflow.python.framework import ops
from tensorflow.python.training.server_lib import ClusterSpec
from tensorflow.python.util.tf_export import tf_export
def format_master_url(master, rpc_layer=None):
@ -42,6 +44,7 @@ def get_accelerator_devices(master, config_proto):
return devices
@tf_export('distribute.cluster_resolver.ClusterResolver')
@six.add_metaclass(abc.ABCMeta)
class ClusterResolver(object):
"""Abstract class for all implementations of ClusterResolvers.
@ -104,17 +107,14 @@ class ClusterResolver(object):
def num_accelerators(self,
task_type=None,
task_id=None,
accelerator_type='GPU',
config_proto=None):
"""Returns the number of accelerator cores per worker.
This returns the number of accelerator cores (such as GPUs and TPUs)
available per worker. If workers only has CPU cores available, then this
should return 0. This method will query the master for this information
if it is not otherwise known.
available per worker.
Optionally, we allow callers to specify the task_type, task_id, and
rpc_layer, if they want to target a specific TensorFlow process to query
Optionally, we allow callers to specify the task_type, and task_id, for
if they want to target a specific TensorFlow process to query
the number of accelerators. This is to support heterogenous environments,
where the number of accelerators cores per host is different.
@ -123,21 +123,39 @@ class ClusterResolver(object):
want to query.
task_id: (Optional) The index of the TensorFlow task of the machine we
want to query.
accelerator_type: (Optional) The type of accelerator we are trying to
query (defaults to 'GPU').
config_proto: (Optional) Configuration for starting a new session to
query how many accelerator cores it has.
Returns:
A map of accelerator types to number of cores.
"""
master = self.master(task_type, task_id)
devices = get_accelerator_devices(master, config_proto)
return sum(1 for d in devices if d.device_type == accelerator_type)
mapping = collections.defaultdict(int)
for device in devices:
mapping[device.device_type] += 1
return mapping
@abc.abstractproperty
@property
def environment(self):
"""Returns the current environment which TensorFlow is running in."""
raise NotImplementedError()
"""Returns the current environment which TensorFlow is running in.
There are two possible return values, "google" (when TensorFlow is running
in a Google-internal environment) or an empty string (when TensorFlow is
running elsewhere).
If you are implementing a ClusterResolver that works in both the Google
environment and the open-source world (for instance, a TPU ClusterResolver
or similar), you will have to return the appropriate string depending on the
environment, which you will have to detect.
Otherwise, if you are implementing a ClusterResolver that will only work
in open-source TensorFlow, you do not need to implement this property.
"""
return ''
@tf_export('distribute.cluster_resolver.SimpleClusterResolver')
class SimpleClusterResolver(ClusterResolver):
"""Simple implementation of ClusterResolver that accepts a ClusterSpec."""
@ -237,6 +255,7 @@ class SimpleClusterResolver(ClusterResolver):
self._rpc_layer = rpc_layer
@tf_export('distribute.cluster_resolver.UnionResolver')
class UnionClusterResolver(ClusterResolver):
"""Performs a union on underlying ClusterResolvers.

View File

@ -57,24 +57,28 @@ class BaseClusterResolverTest(test.TestCase):
mock_list_devices.return_value = device_list
resolver = MockBaseClusterResolver()
self.assertEqual(resolver.num_accelerators(), 4)
self.assertEqual(resolver.num_accelerators(), {"GPU": 4})
@mock.patch.object(session.BaseSession, "list_devices")
def testNumAcceleratorsFilterSuccess(self, mock_list_devices):
def testNumAcceleratorsMultiDeviceSuccess(self, mock_list_devices):
device_names = [
"/job:worker/task:0/device:TPU:0",
"/job:worker/task:0/device:TPU:1",
"/job:worker/task:0/device:TPU:2",
"/job:worker/task:0/device:TPU:3",
"/job:worker/task:0/device:GPU:0",
"/job:worker/task:0/device:GPU:1",
"/job:worker/task:0/device:GPU:2",
"/job:worker/task:0/device:GPU:3",
]
device_list = [
session._DeviceAttributes(
name, "TPU", 1024, 0) for name in device_names
name, name[26:29], 1024, 0) for name in device_names
]
mock_list_devices.return_value = device_list
resolver = MockBaseClusterResolver()
self.assertEqual(resolver.num_accelerators(), 0)
self.assertEqual(resolver.num_accelerators(), {"TPU": 4, "GPU": 4})
class UnionClusterResolverTest(test.TestCase):

View File

@ -20,6 +20,8 @@ from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.training.server_lib import ClusterSpec
from tensorflow.python.util.tf_export import tf_export
_GOOGLE_API_CLIENT_INSTALLED = True
try:
@ -29,11 +31,8 @@ except ImportError:
_GOOGLE_API_CLIENT_INSTALLED = False
def _format_master_url(master, rpc_layer=None):
return '%s://%s' % (rpc_layer, master) if rpc_layer else master
class GceClusterResolver(ClusterResolver):
@tf_export('distribute.cluster_resolver.GCEClusterResolver')
class GCEClusterResolver(ClusterResolver):
"""Cluster Resolver for Google Compute Engine.
This is an implementation of cluster resolvers for the Google Compute Engine
@ -53,9 +52,9 @@ class GceClusterResolver(ClusterResolver):
rpc_layer='grpc',
credentials='default',
service=None):
"""Creates a new GceClusterResolver object.
"""Creates a new GCEClusterResolver object.
This takes in a few parameters and creates a GceClusterResolver project. It
This takes in a few parameters and creates a GCEClusterResolver project. It
will then use these parameters to query the GCE API for the IP addresses of
each instance in the instance group.
@ -173,23 +172,13 @@ class GceClusterResolver(ClusterResolver):
@task_type.setter
def task_type(self, task_type):
raise RuntimeError(
'You cannot reset the task_type of the GceClusterResolver after it has '
'You cannot reset the task_type of the GCEClusterResolver after it has '
'been created.')
@task_id.setter
def task_id(self, task_id):
self._task_id = task_id
@property
def environment(self):
"""Returns the current environment which TensorFlow is running in.
For users in the GCE environment, the environment property is always an
empty string, and Google users will not use this ClusterResolver for running
on internal systems.
"""
return ''
@property
def rpc_layer(self):
return self._rpc_layer

View File

@ -12,13 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for GceClusterResolver."""
"""Tests for GCEClusterResolver."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver import GceClusterResolver
from tensorflow.python.distribute.cluster_resolver import GCEClusterResolver
from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver
from tensorflow.python.platform import test
from tensorflow.python.training import server_lib
@ -27,7 +27,7 @@ from tensorflow.python.training import server_lib
mock = test.mock
class GceClusterResolverTest(test.TestCase):
class GCEClusterResolverTest(test.TestCase):
def _verifyClusterSpecEquality(self, cluster_spec, expected_proto):
self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def())
@ -121,7 +121,7 @@ class GceClusterResolverTest(test.TestCase):
return self.standard_mock_service_client(mock_instance_group, mock_instance)
def testSimpleSuccessfulRetrieval(self):
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -136,7 +136,7 @@ class GceClusterResolverTest(test.TestCase):
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
def testMasterRetrieval(self):
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -153,7 +153,7 @@ class GceClusterResolverTest(test.TestCase):
{'name': 'instance3', 'ip': '10.3.4.5'},
]
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -172,7 +172,7 @@ class GceClusterResolverTest(test.TestCase):
{'name': 'instance3', 'ip': '10.3.4.5'},
]
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -196,7 +196,7 @@ class GceClusterResolverTest(test.TestCase):
{'name': 'instance3', 'ip': '10.3.4.5'},
]
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -210,7 +210,7 @@ class GceClusterResolverTest(test.TestCase):
task_type='', task_id=0), 'grpc://10.1.2.3:8470')
def testCustomJobNameAndPortRetrieval(self):
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -232,7 +232,7 @@ class GceClusterResolverTest(test.TestCase):
{'name': 'instance3', 'ip': '10.3.4.5'},
]
gce_cluster_resolver = GceClusterResolver(
gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -266,7 +266,7 @@ class GceClusterResolverTest(test.TestCase):
{'name': 'ps2', 'ip': '10.100.2.3'},
]
worker1_gce_cluster_resolver = GceClusterResolver(
worker1_gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -275,7 +275,7 @@ class GceClusterResolverTest(test.TestCase):
credentials=None,
service=self.gen_standard_mock_service_client(worker1_name_to_ip))
worker2_gce_cluster_resolver = GceClusterResolver(
worker2_gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',
@ -284,7 +284,7 @@ class GceClusterResolverTest(test.TestCase):
credentials=None,
service=self.gen_standard_mock_service_client(worker2_name_to_ip))
ps_gce_cluster_resolver = GceClusterResolver(
ps_gce_cluster_resolver = GCEClusterResolver(
project='test-project',
zone='us-east1-d',
instance_group='test-instance-group',

View File

@ -21,6 +21,7 @@ from __future__ import print_function
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
from tensorflow.python.training import server_lib
from tensorflow.python.util.tf_export import tf_export
_KUBERNETES_API_CLIENT_INSTALLED = True
try:
@ -30,6 +31,7 @@ except ImportError:
_KUBERNETES_API_CLIENT_INSTALLED = False
@tf_export('distribute.cluster_resolver.KubernetesClusterResolver')
class KubernetesClusterResolver(ClusterResolver):
"""Cluster Resolver for Kubernetes.
@ -154,13 +156,3 @@ class KubernetesClusterResolver(ClusterResolver):
cluster_map[tf_job] = all_pods
return server_lib.ClusterSpec(cluster_map)
@property
def environment(self):
"""Returns the current environment which TensorFlow is running in.
For users in the Cloud environment, the environment property is always an
empty string, and Google users will not use this ClusterResolver for running
on internal systems.
"""
return ''

View File

@ -25,8 +25,10 @@ import subprocess
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
from tensorflow.python.training.server_lib import ClusterSpec
from tensorflow.python.util.tf_export import tf_export
@tf_export('distribute.cluster_resolver.SlurmClusterResolver')
class SlurmClusterResolver(ClusterResolver):
"""Cluster Resolver for system with Slurm workload manager.
@ -215,16 +217,6 @@ class SlurmClusterResolver(ClusterResolver):
return ''
@property
def environment(self):
"""Returns the current environment which TensorFlow is running in.
For users in the Slurm environment, the environment property is always an
empty string, and Google users will not use this ClusterResolver for running
on internal systems.
"""
return ''
def num_accelerators(self,
task_type=None,
task_id=None,

View File

@ -24,6 +24,7 @@ import os
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
from tensorflow.python.training.server_lib import ClusterSpec
from tensorflow.python.util.tf_export import tf_export
_TF_CONFIG_ENV = 'TF_CONFIG'
_SESSION_MASTER_KEY = 'session_master'
@ -47,6 +48,7 @@ def _get_value_in_tfconfig(key, default=None):
return tf_config[key] if key in tf_config else default
@tf_export('distribute.cluster_resolver.TFConfigClusterResolver')
class TFConfigClusterResolver(ClusterResolver):
"""Implementation of a ClusterResolver which reads the TF_CONFIG EnvVar."""

View File

@ -34,6 +34,7 @@ from tensorflow.python.framework import errors
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.training import server_lib
from tensorflow.python.util import compat
from tensorflow.python.util.tf_export import tf_export
_GOOGLE_API_CLIENT_INSTALLED = True
try:
@ -42,7 +43,6 @@ try:
except ImportError:
_GOOGLE_API_CLIENT_INSTALLED = False
_GKE_ENV_VARIABLE = 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'
_ENDPOINTS_SEPARATOR = ','
_DEFAULT_ENV_VARIABLE = 'TPU_NAME'
@ -56,38 +56,7 @@ DeviceDetails = collections.namedtuple(
'DeviceDetails', ['device_map', 'total_cores'])
def _get_device_dict_and_cores(devices):
"""Returns a dict of hosts to cores and total cores given devices names.
Returns a namedtuple with two attributes:
device_map: A map of host_ids to a list of core_ids.
total_cores: The total number of cores within the TPU system.
Args:
devices: A list of devices returned by session.list_devices()
"""
device_map = collections.defaultdict(list)
num_cores = 0
for device in devices:
match = _TPU_DEVICE_REGEX.match(device.name)
if match:
host_id = match.group('host_id')
core_id = match.group('core_id')
device_map[host_id].append(core_id)
num_cores += 1
return DeviceDetails(device_map, num_cores)
def _verify_and_return_same_core_count(device_dict):
"""Verifies that every device in device_dict has the same number of cores."""
num_cores_per_host_set = (
{len(core_ids) for core_ids in device_dict.values()})
if len(num_cores_per_host_set) != 1:
raise RuntimeError('TPU cores on each device is not the same. This '
'should never happen. Devices: {}'.format(device_dict))
return num_cores_per_host_set.pop()
@tf_export('distribute.cluster_resolver.TPUClusterResolver')
class TPUClusterResolver(ClusterResolver):
"""Cluster Resolver for Google Cloud TPUs.
@ -143,6 +112,38 @@ class TPUClusterResolver(ClusterResolver):
return False
return True
@staticmethod
def _get_device_dict_and_cores(devices):
"""Returns a dict of hosts to cores and total cores given devices names.
Returns a namedtuple with two attributes:
device_map: A map of host_ids to a list of core_ids.
total_cores: The total number of cores within the TPU system.
Args:
devices: A list of devices returned by session.list_devices()
"""
device_map = collections.defaultdict(list)
num_cores = 0
for device in devices:
match = _TPU_DEVICE_REGEX.match(device.name)
if match:
host_id = match.group('host_id')
core_id = match.group('core_id')
device_map[host_id].append(core_id)
num_cores += 1
return DeviceDetails(device_map, num_cores)
@staticmethod
def _verify_and_return_same_core_count(device_dict):
"""Verifies that every device in device_dict has the same # of cores."""
num_cores_per_host_set = (
{len(core_ids) for core_ids in device_dict.values()})
if len(num_cores_per_host_set) != 1:
raise RuntimeError('TPU cores on each device is not the same. This '
'should never happen. Devices: {}'.format(device_dict))
return num_cores_per_host_set.pop()
@staticmethod
def _inGke():
"""When running in GKE, the environment variable will be set."""
@ -482,7 +483,7 @@ class TPUClusterResolver(ClusterResolver):
# TODO(b/120564445): Replace with standard library for retries.
while True:
try:
device_details = _get_device_dict_and_cores(
device_details = TPUClusterResolver._get_device_dict_and_cores(
get_accelerator_devices(self.master(), config_proto=config_proto))
break
except errors.DeadlineExceededError:
@ -497,7 +498,8 @@ class TPUClusterResolver(ClusterResolver):
raise RuntimeError(error_message)
if device_details.total_cores:
return _verify_and_return_same_core_count(device_details.device_map)
return TPUClusterResolver._verify_and_return_same_core_count(
device_details.device_map)
return 0
@property

View File

@ -24,8 +24,7 @@ import six
from six.moves.urllib.error import URLError
from tensorflow.python.client import session
from tensorflow.python.distribute import cluster_resolver
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
from tensorflow.python.framework import errors
from tensorflow.python.platform import test
from tensorflow.python.training import server_lib
@ -129,26 +128,26 @@ class TPUClusterResolverTest(test.TestCase):
return mock_client
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_isRunningInGCE',
mock_is_running_in_gce)
def testCheckRunningInGceWithNoTpuName(self):
with self.assertRaisesRegexp(RuntimeError, '.*Google Cloud.*'):
cluster_resolver.TPUClusterResolver(tpu='')
TPUClusterResolver(tpu='')
@mock.patch.object(six.moves.urllib.request,
'urlopen',
mock_running_in_gce_urlopen)
def testIsRunningInGce(self):
self.assertTrue(cluster_resolver.TPUClusterResolver._isRunningInGCE())
self.assertTrue(TPUClusterResolver._isRunningInGCE())
@mock.patch.object(six.moves.urllib.request,
'urlopen',
mock_not_running_in_gce_urlopen)
def testIsNotRunningInGce(self):
self.assertFalse(cluster_resolver.TPUClusterResolver._isRunningInGCE())
self.assertFalse(TPUClusterResolver._isRunningInGCE())
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_requestComputeMetadata',
mock_request_compute_metadata)
def testRetrieveProjectAndZoneFromMetadata(self):
@ -160,7 +159,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu=['test-tpu-1'],
@ -182,7 +181,7 @@ class TPUClusterResolverTest(test.TestCase):
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_requestComputeMetadata',
mock_request_compute_metadata)
def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self):
@ -194,7 +193,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu=['test-tpu-1'],
@ -209,7 +208,7 @@ class TPUClusterResolverTest(test.TestCase):
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_requestComputeMetadata',
mock_request_compute_metadata)
def testUnhealthyCloudTpu(self):
@ -221,7 +220,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu='test-tpu-1',
@ -232,7 +231,7 @@ class TPUClusterResolverTest(test.TestCase):
with self.assertRaises(RuntimeError):
resolver.cluster_spec()
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_requestComputeMetadata',
mock_request_compute_metadata)
def testNotReadyCloudTpu(self):
@ -244,7 +243,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project=None,
zone=None,
tpu='test-tpu-1',
@ -264,7 +263,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=['test-tpu-1'],
@ -292,7 +291,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
@ -309,7 +308,7 @@ class TPUClusterResolverTest(test.TestCase):
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
self.assertEqual('grpc://10.2.3.4:8470', resolver.master())
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_requestComputeMetadata',
mock_request_compute_metadata)
def testPodResolution(self):
@ -338,7 +337,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
tpu='test-tpu-1',
credentials=None,
service=self.mock_service_client(tpu_map=tpu_map),
@ -387,7 +386,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
@ -412,7 +411,7 @@ class TPUClusterResolverTest(test.TestCase):
tpu_map = {}
with self.assertRaises(ValueError):
cluster_resolver.TPUClusterResolver(
TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=[],
@ -422,7 +421,7 @@ class TPUClusterResolverTest(test.TestCase):
# TODO(saeta): Convert to parameterized test when included in OSS TF.
def verifyShouldResolve(self, tpu, should_resolve):
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu=tpu,
@ -432,7 +431,7 @@ class TPUClusterResolverTest(test.TestCase):
self.assertEqual(should_resolve, resolver._shouldResolve(),
"TPU: '%s'" % tpu)
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_isRunningInGCE',
mock_is_not_running_in_gce)
def testShouldResolveNoName(self):
@ -457,7 +456,7 @@ class TPUClusterResolverTest(test.TestCase):
self.verifyShouldResolve('grpctpu', True)
def testNoCallComputeMetadata(self):
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
tpu='/bns/foo/bar')
self.assertEqual('/bns/foo/bar', resolver.master())
self.assertEqual(None, resolver.cluster_spec())
@ -466,12 +465,12 @@ class TPUClusterResolverTest(test.TestCase):
os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470'
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
self.assertTrue(TPUClusterResolver._inGke())
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470'),
compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
resolver = cluster_resolver.TPUClusterResolver()
resolver = TPUClusterResolver()
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470'),
compat.as_bytes(resolver.master()))
@ -493,15 +492,15 @@ class TPUClusterResolverTest(test.TestCase):
'grpc://10.120.27.8:8470')
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
self.assertTrue(TPUClusterResolver._inGke())
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470,'
'grpc://10.120.27.6:8470,'
'grpc://10.120.27.7:8470,'
'grpc://10.120.27.8:8470'),
compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
resolver = cluster_resolver.TPUClusterResolver()
resolver = TPUClusterResolver()
self.assertEqual(
compat.as_bytes('grpc://10.120.27.5:8470'),
compat.as_bytes(resolver.master()))
@ -522,17 +521,17 @@ class TPUClusterResolverTest(test.TestCase):
def testEnvironmentDiscoveryUrl(self):
os.environ['TPU_API_DISCOVERY_URL'] = 'https://{api}.internal/{apiVersion}'
self.assertEqual('https://{api}.internal/{apiVersion}',
(cluster_resolver.TPUClusterResolver.
(TPUClusterResolver.
_environmentDiscoveryUrl()))
def testEnvironmentAndRpcDetectionForGoogle(self):
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
tpu='/bns/ab/cd/ef')
self.assertEqual(resolver.environment, 'google')
self.assertEqual(resolver.rpc_layer, None)
def testEnvironmentAndRpcDetectionForGrpcString(self):
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
tpu='grpc://10.1.2.3:8470')
self.assertEqual(resolver.environment, '')
self.assertEqual(resolver.rpc_layer, 'grpc')
@ -564,7 +563,7 @@ class TPUClusterResolverTest(test.TestCase):
}
}
resolver = cluster_resolver.TPUClusterResolver(
resolver = TPUClusterResolver(
project='test-project',
zone='us-central1-c',
tpu='test-tpu-1',
@ -599,7 +598,7 @@ class TPUClusterResolverTest(test.TestCase):
name, 'TPU', 1024, 0) for name in device_names
]
device_details = tpu_cluster_resolver._get_device_dict_and_cores(
device_details = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(device_details.total_cores, 8)
self.assertEqual(device_details.device_map,
@ -624,24 +623,24 @@ class TPUClusterResolverTest(test.TestCase):
name, 'XLA', 1024, 0) for name in device_names
]
device_dict, num_cores = tpu_cluster_resolver._get_device_dict_and_cores(
device_dict, num_cores = TPUClusterResolver._get_device_dict_and_cores(
device_list)
self.assertEqual(num_cores, 0)
self.assertEqual(device_dict, {})
def testVerifySameCoreCount(self):
self.assertEqual(
tpu_cluster_resolver._verify_and_return_same_core_count(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8)
self.assertEqual(
tpu_cluster_resolver._verify_and_return_same_core_count(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0, 1], 1: [2, 3]}), 2)
with self.assertRaises(RuntimeError):
tpu_cluster_resolver._verify_and_return_same_core_count(
TPUClusterResolver._verify_and_return_same_core_count(
{0: [0], 1: [1, 2]})
@mock.patch.object(session.BaseSession, 'list_devices')
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_isRunningInGCE',
mock_is_not_running_in_gce)
def testNumAcceleratorsSuccess(self, mock_list_devices):
@ -661,15 +660,15 @@ class TPUClusterResolverTest(test.TestCase):
]
mock_list_devices.return_value = device_list
resolver = cluster_resolver.TPUClusterResolver(tpu='')
resolver = TPUClusterResolver(tpu='')
self.assertEqual(resolver.num_accelerators(), 2)
@mock.patch.object(session.BaseSession, 'list_devices')
@mock.patch.object(cluster_resolver.TPUClusterResolver,
@mock.patch.object(TPUClusterResolver,
'_isRunningInGCE',
mock_is_not_running_in_gce)
def testNumAcceleratorsRetryFailure(self, mock_list_devices):
resolver = cluster_resolver.TPUClusterResolver(tpu='')
resolver = TPUClusterResolver(tpu='')
mock_list_devices.side_effect = errors.DeadlineExceededError(
None, None, 'timeout')
with self.assertRaises(RuntimeError):

View File

@ -14,6 +14,7 @@ TENSORFLOW_API_INIT_FILES = [
"data/experimental/__init__.py",
"debugging/__init__.py",
"distribute/__init__.py",
"distribute/cluster_resolver/__init__.py",
"dtypes/__init__.py",
"errors/__init__.py",
"experimental/__init__.py",

View File

@ -15,6 +15,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
"data/experimental/__init__.py",
"debugging/__init__.py",
"distribute/__init__.py",
"distribute/cluster_resolver/__init__.py",
"distributions/__init__.py",
"dtypes/__init__.py",
"errors/__init__.py",

View File

@ -0,0 +1,24 @@
path: "tensorflow.distribute.cluster_resolver.ClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver.GCEClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,26 @@
path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver.KubernetesClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.SimpleClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,30 @@
path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver.SlurmClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_task_info"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver.TFConfigClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,34 @@
path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_job_name"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_master"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.UnionResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.UnionClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,35 @@
path: "tensorflow.distribute.cluster_resolver"
tf_module {
member {
name: "ClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "GCEClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "KubernetesClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "SimpleClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "SlurmClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "TFConfigClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "TPUClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "UnionResolver"
mtype: "<type \'type\'>"
}
}

View File

@ -32,6 +32,10 @@ tf_module {
name: "StrategyExtended"
mtype: "<type \'type\'>"
}
member {
name: "cluster_resolver"
mtype: "<type \'module\'>"
}
member_method {
name: "get_loss_reduction"
argspec: "args=[], varargs=None, keywords=None, defaults=None"

View File

@ -0,0 +1,24 @@
path: "tensorflow.distribute.cluster_resolver.ClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver.GCEClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,26 @@
path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver.KubernetesClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.SimpleClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,30 @@
path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver.SlurmClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_task_info"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver.TFConfigClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
}

View File

@ -0,0 +1,34 @@
path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], "
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_job_name"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "get_master"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], "
}
}

View File

@ -0,0 +1,38 @@
path: "tensorflow.distribute.cluster_resolver.UnionResolver"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.UnionClusterResolver\'>"
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
is_instance: "<type \'object\'>"
member {
name: "environment"
mtype: "<type \'property\'>"
}
member {
name: "rpc_layer"
mtype: "<type \'property\'>"
}
member {
name: "task_id"
mtype: "<type \'property\'>"
}
member {
name: "task_type"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
}
member_method {
name: "cluster_spec"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "master"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "num_accelerators"
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
}
}

View File

@ -0,0 +1,35 @@
path: "tensorflow.distribute.cluster_resolver"
tf_module {
member {
name: "ClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "GCEClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "KubernetesClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "SimpleClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "SlurmClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "TFConfigClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "TPUClusterResolver"
mtype: "<type \'type\'>"
}
member {
name: "UnionResolver"
mtype: "<type \'type\'>"
}
}

View File

@ -32,6 +32,10 @@ tf_module {
name: "StrategyExtended"
mtype: "<type \'type\'>"
}
member {
name: "cluster_resolver"
mtype: "<type \'module\'>"
}
member_method {
name: "get_loss_reduction"
argspec: "args=[], varargs=None, keywords=None, defaults=None"