diff --git a/tensorflow/contrib/cluster_resolver/__init__.py b/tensorflow/contrib/cluster_resolver/__init__.py index 390b3e7550b..a4dea85efd9 100644 --- a/tensorflow/contrib/cluster_resolver/__init__.py +++ b/tensorflow/contrib/cluster_resolver/__init__.py @@ -23,7 +23,7 @@ from __future__ import print_function from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver -from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver +from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver @@ -36,7 +36,7 @@ _allowed_symbols = [ 'ClusterResolver', 'SimpleClusterResolver', 'UnionClusterResolver', - 'GceClusterResolver', + 'GCEClusterResolver', 'KubernetesClusterResolver', 'TFConfigClusterResolver', 'TPUClusterResolver', diff --git a/tensorflow/contrib/cluster_resolver/python/training/__init__.py b/tensorflow/contrib/cluster_resolver/python/training/__init__.py index 10d93549ebb..ef1e9f11a07 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/__init__.py +++ b/tensorflow/contrib/cluster_resolver/python/training/__init__.py @@ -25,7 +25,7 @@ from __future__ import print_function from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver -from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver +from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver @@ -43,7 +43,7 @@ _allowed_symbols = [ 'ClusterResolver', 'SimpleClusterResolver', 'UnionClusterResolver', - 'GceClusterResolver', + 'GCEClusterResolver', 'KubernetesClusterResolver', 'TFConfigClusterResolver', 'TPUClusterResolver', diff --git a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py index 55e61155c68..5b49116ff6a 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Stub file for GceClusterResolver to maintain backwards compatibility.""" +"""Stub file for GCEClusterResolver to maintain backwards compatibility.""" from __future__ import absolute_import from __future__ import division @@ -23,13 +23,14 @@ from __future__ import print_function # existing OSS code will not be broken. # pylint: disable=unused-import -from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver +from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented + _allowed_symbols = [ - 'GceClusterResolver', + 'GCEClusterResolver', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py index 4387210062e..69ce1141d8b 100644 --- a/tensorflow/contrib/distribute/python/tpu_strategy.py +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -39,7 +39,7 @@ from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import numpy_dataset from tensorflow.python.distribute import reduce_util from tensorflow.python.distribute import values -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver_lib +from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import constant_op @@ -65,7 +65,7 @@ def initialize_tpu_system(cluster_resolver=None): The tf.contrib.tpu.Topology object for the topology of the TPU cluster. """ if cluster_resolver is None: - cluster_resolver = resolver_lib.TPUClusterResolver("") + cluster_resolver = TPUClusterResolver("") master = cluster_resolver.master() logging.info("Initializing the TPU system.") @@ -246,7 +246,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): super(TPUExtended, self).__init__(container_strategy) if tpu_cluster_resolver is None: - tpu_cluster_resolver = resolver_lib.TPUClusterResolver("") + tpu_cluster_resolver = TPUClusterResolver("") if steps_per_run is None: # TODO(frankchn): Warn when we are being used by DS/Keras and this is diff --git a/tensorflow/python/distribute/__init__.py b/tensorflow/python/distribute/__init__.py index 4ff912ae10d..54e539ecdd2 100644 --- a/tensorflow/python/distribute/__init__.py +++ b/tensorflow/python/distribute/__init__.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import +from tensorflow.python.distribute import cluster_resolver from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.distribute import mirrored_strategy diff --git a/tensorflow/python/distribute/cluster_resolver/__init__.py b/tensorflow/python/distribute/cluster_resolver/__init__.py index ef87f59b7fd..39ea191fb04 100644 --- a/tensorflow/python/distribute/cluster_resolver/__init__.py +++ b/tensorflow/python/distribute/cluster_resolver/__init__.py @@ -18,40 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.distribute.cluster_resolver import cluster_resolver -from tensorflow.python.distribute.cluster_resolver import gce_cluster_resolver -from tensorflow.python.distribute.cluster_resolver import kubernetes_cluster_resolver -from tensorflow.python.distribute.cluster_resolver import slurm_cluster_resolver -from tensorflow.python.distribute.cluster_resolver import tfconfig_cluster_resolver -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver - from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver -from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver +from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver from tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver import TPUClusterResolver - -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'cluster_resolver', - 'gce_cluster_resolver', - 'kubernetes_cluster_resolver', - 'slurm_cluster_resolver', - 'tfconfig_cluster_resolver', - 'tpu_cluster_resolver', - 'ClusterResolver', - 'SimpleClusterResolver', - 'UnionClusterResolver', - 'GceClusterResolver', - 'KubernetesClusterResolver', - 'TFConfigClusterResolver', - 'TPUClusterResolver', - 'SlurmClusterResolver', -] - -remove_undocumented(__name__, _allowed_symbols) - diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py index 8cc7cff6394..22b93f03302 100644 --- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py @@ -20,11 +20,13 @@ from __future__ import print_function import abc +import collections import six from tensorflow.python.client import session from tensorflow.python.framework import ops from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.util.tf_export import tf_export def format_master_url(master, rpc_layer=None): @@ -42,6 +44,7 @@ def get_accelerator_devices(master, config_proto): return devices +@tf_export('distribute.cluster_resolver.ClusterResolver') @six.add_metaclass(abc.ABCMeta) class ClusterResolver(object): """Abstract class for all implementations of ClusterResolvers. @@ -104,17 +107,14 @@ class ClusterResolver(object): def num_accelerators(self, task_type=None, task_id=None, - accelerator_type='GPU', config_proto=None): """Returns the number of accelerator cores per worker. This returns the number of accelerator cores (such as GPUs and TPUs) - available per worker. If workers only has CPU cores available, then this - should return 0. This method will query the master for this information - if it is not otherwise known. + available per worker. - Optionally, we allow callers to specify the task_type, task_id, and - rpc_layer, if they want to target a specific TensorFlow process to query + Optionally, we allow callers to specify the task_type, and task_id, for + if they want to target a specific TensorFlow process to query the number of accelerators. This is to support heterogenous environments, where the number of accelerators cores per host is different. @@ -123,21 +123,39 @@ class ClusterResolver(object): want to query. task_id: (Optional) The index of the TensorFlow task of the machine we want to query. - accelerator_type: (Optional) The type of accelerator we are trying to - query (defaults to 'GPU'). config_proto: (Optional) Configuration for starting a new session to query how many accelerator cores it has. + + Returns: + A map of accelerator types to number of cores. """ master = self.master(task_type, task_id) devices = get_accelerator_devices(master, config_proto) - return sum(1 for d in devices if d.device_type == accelerator_type) + mapping = collections.defaultdict(int) + for device in devices: + mapping[device.device_type] += 1 + return mapping - @abc.abstractproperty + @property def environment(self): - """Returns the current environment which TensorFlow is running in.""" - raise NotImplementedError() + """Returns the current environment which TensorFlow is running in. + + There are two possible return values, "google" (when TensorFlow is running + in a Google-internal environment) or an empty string (when TensorFlow is + running elsewhere). + + If you are implementing a ClusterResolver that works in both the Google + environment and the open-source world (for instance, a TPU ClusterResolver + or similar), you will have to return the appropriate string depending on the + environment, which you will have to detect. + + Otherwise, if you are implementing a ClusterResolver that will only work + in open-source TensorFlow, you do not need to implement this property. + """ + return '' +@tf_export('distribute.cluster_resolver.SimpleClusterResolver') class SimpleClusterResolver(ClusterResolver): """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" @@ -237,6 +255,7 @@ class SimpleClusterResolver(ClusterResolver): self._rpc_layer = rpc_layer +@tf_export('distribute.cluster_resolver.UnionResolver') class UnionClusterResolver(ClusterResolver): """Performs a union on underlying ClusterResolvers. diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py index c1eb29e2fc9..019d223eb02 100644 --- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py @@ -57,24 +57,28 @@ class BaseClusterResolverTest(test.TestCase): mock_list_devices.return_value = device_list resolver = MockBaseClusterResolver() - self.assertEqual(resolver.num_accelerators(), 4) + self.assertEqual(resolver.num_accelerators(), {"GPU": 4}) @mock.patch.object(session.BaseSession, "list_devices") - def testNumAcceleratorsFilterSuccess(self, mock_list_devices): + def testNumAcceleratorsMultiDeviceSuccess(self, mock_list_devices): device_names = [ "/job:worker/task:0/device:TPU:0", "/job:worker/task:0/device:TPU:1", "/job:worker/task:0/device:TPU:2", "/job:worker/task:0/device:TPU:3", + "/job:worker/task:0/device:GPU:0", + "/job:worker/task:0/device:GPU:1", + "/job:worker/task:0/device:GPU:2", + "/job:worker/task:0/device:GPU:3", ] device_list = [ session._DeviceAttributes( - name, "TPU", 1024, 0) for name in device_names + name, name[26:29], 1024, 0) for name in device_names ] mock_list_devices.return_value = device_list resolver = MockBaseClusterResolver() - self.assertEqual(resolver.num_accelerators(), 0) + self.assertEqual(resolver.num_accelerators(), {"TPU": 4, "GPU": 4}) class UnionClusterResolverTest(test.TestCase): diff --git a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py index 305c53870de..9d7dfdd1ea9 100644 --- a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py @@ -20,6 +20,8 @@ from __future__ import print_function from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.util.tf_export import tf_export + _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -29,11 +31,8 @@ except ImportError: _GOOGLE_API_CLIENT_INSTALLED = False -def _format_master_url(master, rpc_layer=None): - return '%s://%s' % (rpc_layer, master) if rpc_layer else master - - -class GceClusterResolver(ClusterResolver): +@tf_export('distribute.cluster_resolver.GCEClusterResolver') +class GCEClusterResolver(ClusterResolver): """Cluster Resolver for Google Compute Engine. This is an implementation of cluster resolvers for the Google Compute Engine @@ -53,9 +52,9 @@ class GceClusterResolver(ClusterResolver): rpc_layer='grpc', credentials='default', service=None): - """Creates a new GceClusterResolver object. + """Creates a new GCEClusterResolver object. - This takes in a few parameters and creates a GceClusterResolver project. It + This takes in a few parameters and creates a GCEClusterResolver project. It will then use these parameters to query the GCE API for the IP addresses of each instance in the instance group. @@ -173,23 +172,13 @@ class GceClusterResolver(ClusterResolver): @task_type.setter def task_type(self, task_type): raise RuntimeError( - 'You cannot reset the task_type of the GceClusterResolver after it has ' + 'You cannot reset the task_type of the GCEClusterResolver after it has ' 'been created.') @task_id.setter def task_id(self, task_id): self._task_id = task_id - @property - def environment(self): - """Returns the current environment which TensorFlow is running in. - - For users in the GCE environment, the environment property is always an - empty string, and Google users will not use this ClusterResolver for running - on internal systems. - """ - return '' - @property def rpc_layer(self): return self._rpc_layer diff --git a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver_test.py index 07b9eeb08ef..47d1cdc0da9 100644 --- a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for GceClusterResolver.""" +"""Tests for GCEClusterResolver.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.distribute.cluster_resolver import GceClusterResolver +from tensorflow.python.distribute.cluster_resolver import GCEClusterResolver from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -27,7 +27,7 @@ from tensorflow.python.training import server_lib mock = test.mock -class GceClusterResolverTest(test.TestCase): +class GCEClusterResolverTest(test.TestCase): def _verifyClusterSpecEquality(self, cluster_spec, expected_proto): self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def()) @@ -121,7 +121,7 @@ class GceClusterResolverTest(test.TestCase): return self.standard_mock_service_client(mock_instance_group, mock_instance) def testSimpleSuccessfulRetrieval(self): - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -136,7 +136,7 @@ class GceClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) def testMasterRetrieval(self): - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -153,7 +153,7 @@ class GceClusterResolverTest(test.TestCase): {'name': 'instance3', 'ip': '10.3.4.5'}, ] - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -172,7 +172,7 @@ class GceClusterResolverTest(test.TestCase): {'name': 'instance3', 'ip': '10.3.4.5'}, ] - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -196,7 +196,7 @@ class GceClusterResolverTest(test.TestCase): {'name': 'instance3', 'ip': '10.3.4.5'}, ] - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -210,7 +210,7 @@ class GceClusterResolverTest(test.TestCase): task_type='', task_id=0), 'grpc://10.1.2.3:8470') def testCustomJobNameAndPortRetrieval(self): - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -232,7 +232,7 @@ class GceClusterResolverTest(test.TestCase): {'name': 'instance3', 'ip': '10.3.4.5'}, ] - gce_cluster_resolver = GceClusterResolver( + gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -266,7 +266,7 @@ class GceClusterResolverTest(test.TestCase): {'name': 'ps2', 'ip': '10.100.2.3'}, ] - worker1_gce_cluster_resolver = GceClusterResolver( + worker1_gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -275,7 +275,7 @@ class GceClusterResolverTest(test.TestCase): credentials=None, service=self.gen_standard_mock_service_client(worker1_name_to_ip)) - worker2_gce_cluster_resolver = GceClusterResolver( + worker2_gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', @@ -284,7 +284,7 @@ class GceClusterResolverTest(test.TestCase): credentials=None, service=self.gen_standard_mock_service_client(worker2_name_to_ip)) - ps_gce_cluster_resolver = GceClusterResolver( + ps_gce_cluster_resolver = GCEClusterResolver( project='test-project', zone='us-east1-d', instance_group='test-instance-group', diff --git a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py index 71d48ed2b94..28b2712590d 100644 --- a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url from tensorflow.python.training import server_lib +from tensorflow.python.util.tf_export import tf_export _KUBERNETES_API_CLIENT_INSTALLED = True try: @@ -30,6 +31,7 @@ except ImportError: _KUBERNETES_API_CLIENT_INSTALLED = False +@tf_export('distribute.cluster_resolver.KubernetesClusterResolver') class KubernetesClusterResolver(ClusterResolver): """Cluster Resolver for Kubernetes. @@ -154,13 +156,3 @@ class KubernetesClusterResolver(ClusterResolver): cluster_map[tf_job] = all_pods return server_lib.ClusterSpec(cluster_map) - - @property - def environment(self): - """Returns the current environment which TensorFlow is running in. - - For users in the Cloud environment, the environment property is always an - empty string, and Google users will not use this ClusterResolver for running - on internal systems. - """ - return '' diff --git a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py index 0ec566c670f..04675f4d176 100644 --- a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py @@ -25,8 +25,10 @@ import subprocess from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.util.tf_export import tf_export +@tf_export('distribute.cluster_resolver.SlurmClusterResolver') class SlurmClusterResolver(ClusterResolver): """Cluster Resolver for system with Slurm workload manager. @@ -215,16 +217,6 @@ class SlurmClusterResolver(ClusterResolver): return '' - @property - def environment(self): - """Returns the current environment which TensorFlow is running in. - - For users in the Slurm environment, the environment property is always an - empty string, and Google users will not use this ClusterResolver for running - on internal systems. - """ - return '' - def num_accelerators(self, task_type=None, task_id=None, diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py index 6dcbafbc504..48bfc606517 100644 --- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py @@ -24,6 +24,7 @@ import os from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.util.tf_export import tf_export _TF_CONFIG_ENV = 'TF_CONFIG' _SESSION_MASTER_KEY = 'session_master' @@ -47,6 +48,7 @@ def _get_value_in_tfconfig(key, default=None): return tf_config[key] if key in tf_config else default +@tf_export('distribute.cluster_resolver.TFConfigClusterResolver') class TFConfigClusterResolver(ClusterResolver): """Implementation of a ClusterResolver which reads the TF_CONFIG EnvVar.""" diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py index d02e9b8fe81..b8d2ecc3e23 100644 --- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import errors from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.util import compat +from tensorflow.python.util.tf_export import tf_export _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -42,7 +43,6 @@ try: except ImportError: _GOOGLE_API_CLIENT_INSTALLED = False - _GKE_ENV_VARIABLE = 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' _ENDPOINTS_SEPARATOR = ',' _DEFAULT_ENV_VARIABLE = 'TPU_NAME' @@ -56,38 +56,7 @@ DeviceDetails = collections.namedtuple( 'DeviceDetails', ['device_map', 'total_cores']) -def _get_device_dict_and_cores(devices): - """Returns a dict of hosts to cores and total cores given devices names. - - Returns a namedtuple with two attributes: - device_map: A map of host_ids to a list of core_ids. - total_cores: The total number of cores within the TPU system. - - Args: - devices: A list of devices returned by session.list_devices() - """ - device_map = collections.defaultdict(list) - num_cores = 0 - for device in devices: - match = _TPU_DEVICE_REGEX.match(device.name) - if match: - host_id = match.group('host_id') - core_id = match.group('core_id') - device_map[host_id].append(core_id) - num_cores += 1 - return DeviceDetails(device_map, num_cores) - - -def _verify_and_return_same_core_count(device_dict): - """Verifies that every device in device_dict has the same number of cores.""" - num_cores_per_host_set = ( - {len(core_ids) for core_ids in device_dict.values()}) - if len(num_cores_per_host_set) != 1: - raise RuntimeError('TPU cores on each device is not the same. This ' - 'should never happen. Devices: {}'.format(device_dict)) - return num_cores_per_host_set.pop() - - +@tf_export('distribute.cluster_resolver.TPUClusterResolver') class TPUClusterResolver(ClusterResolver): """Cluster Resolver for Google Cloud TPUs. @@ -143,6 +112,38 @@ class TPUClusterResolver(ClusterResolver): return False return True + @staticmethod + def _get_device_dict_and_cores(devices): + """Returns a dict of hosts to cores and total cores given devices names. + + Returns a namedtuple with two attributes: + device_map: A map of host_ids to a list of core_ids. + total_cores: The total number of cores within the TPU system. + + Args: + devices: A list of devices returned by session.list_devices() + """ + device_map = collections.defaultdict(list) + num_cores = 0 + for device in devices: + match = _TPU_DEVICE_REGEX.match(device.name) + if match: + host_id = match.group('host_id') + core_id = match.group('core_id') + device_map[host_id].append(core_id) + num_cores += 1 + return DeviceDetails(device_map, num_cores) + + @staticmethod + def _verify_and_return_same_core_count(device_dict): + """Verifies that every device in device_dict has the same # of cores.""" + num_cores_per_host_set = ( + {len(core_ids) for core_ids in device_dict.values()}) + if len(num_cores_per_host_set) != 1: + raise RuntimeError('TPU cores on each device is not the same. This ' + 'should never happen. Devices: {}'.format(device_dict)) + return num_cores_per_host_set.pop() + @staticmethod def _inGke(): """When running in GKE, the environment variable will be set.""" @@ -482,7 +483,7 @@ class TPUClusterResolver(ClusterResolver): # TODO(b/120564445): Replace with standard library for retries. while True: try: - device_details = _get_device_dict_and_cores( + device_details = TPUClusterResolver._get_device_dict_and_cores( get_accelerator_devices(self.master(), config_proto=config_proto)) break except errors.DeadlineExceededError: @@ -497,7 +498,8 @@ class TPUClusterResolver(ClusterResolver): raise RuntimeError(error_message) if device_details.total_cores: - return _verify_and_return_same_core_count(device_details.device_map) + return TPUClusterResolver._verify_and_return_same_core_count( + device_details.device_map) return 0 @property diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py index 0cb0dc607fb..7f06dc168bf 100644 --- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py +++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py @@ -24,8 +24,7 @@ import six from six.moves.urllib.error import URLError from tensorflow.python.client import session -from tensorflow.python.distribute import cluster_resolver -from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver +from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver from tensorflow.python.framework import errors from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -129,26 +128,26 @@ class TPUClusterResolverTest(test.TestCase): return mock_client - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_isRunningInGCE', mock_is_running_in_gce) def testCheckRunningInGceWithNoTpuName(self): with self.assertRaisesRegexp(RuntimeError, '.*Google Cloud.*'): - cluster_resolver.TPUClusterResolver(tpu='') + TPUClusterResolver(tpu='') @mock.patch.object(six.moves.urllib.request, 'urlopen', mock_running_in_gce_urlopen) def testIsRunningInGce(self): - self.assertTrue(cluster_resolver.TPUClusterResolver._isRunningInGCE()) + self.assertTrue(TPUClusterResolver._isRunningInGCE()) @mock.patch.object(six.moves.urllib.request, 'urlopen', mock_not_running_in_gce_urlopen) def testIsNotRunningInGce(self): - self.assertFalse(cluster_resolver.TPUClusterResolver._isRunningInGCE()) + self.assertFalse(TPUClusterResolver._isRunningInGCE()) - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testRetrieveProjectAndZoneFromMetadata(self): @@ -160,7 +159,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project=None, zone=None, tpu=['test-tpu-1'], @@ -182,7 +181,7 @@ class TPUClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470') - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self): @@ -194,7 +193,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project=None, zone=None, tpu=['test-tpu-1'], @@ -209,7 +208,7 @@ class TPUClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470') - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testUnhealthyCloudTpu(self): @@ -221,7 +220,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project=None, zone=None, tpu='test-tpu-1', @@ -232,7 +231,7 @@ class TPUClusterResolverTest(test.TestCase): with self.assertRaises(RuntimeError): resolver.cluster_spec() - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testNotReadyCloudTpu(self): @@ -244,7 +243,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project=None, zone=None, tpu='test-tpu-1', @@ -264,7 +263,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', tpu=['test-tpu-1'], @@ -292,7 +291,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', tpu='test-tpu-1', @@ -309,7 +308,7 @@ class TPUClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) self.assertEqual('grpc://10.2.3.4:8470', resolver.master()) - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testPodResolution(self): @@ -338,7 +337,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( tpu='test-tpu-1', credentials=None, service=self.mock_service_client(tpu_map=tpu_map), @@ -387,7 +386,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', tpu='test-tpu-1', @@ -412,7 +411,7 @@ class TPUClusterResolverTest(test.TestCase): tpu_map = {} with self.assertRaises(ValueError): - cluster_resolver.TPUClusterResolver( + TPUClusterResolver( project='test-project', zone='us-central1-c', tpu=[], @@ -422,7 +421,7 @@ class TPUClusterResolverTest(test.TestCase): # TODO(saeta): Convert to parameterized test when included in OSS TF. def verifyShouldResolve(self, tpu, should_resolve): - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', tpu=tpu, @@ -432,7 +431,7 @@ class TPUClusterResolverTest(test.TestCase): self.assertEqual(should_resolve, resolver._shouldResolve(), "TPU: '%s'" % tpu) - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_isRunningInGCE', mock_is_not_running_in_gce) def testShouldResolveNoName(self): @@ -457,7 +456,7 @@ class TPUClusterResolverTest(test.TestCase): self.verifyShouldResolve('grpctpu', True) def testNoCallComputeMetadata(self): - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( tpu='/bns/foo/bar') self.assertEqual('/bns/foo/bar', resolver.master()) self.assertEqual(None, resolver.cluster_spec()) @@ -466,12 +465,12 @@ class TPUClusterResolverTest(test.TestCase): os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470' self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ) - self.assertTrue(cluster_resolver.TPUClusterResolver._inGke()) + self.assertTrue(TPUClusterResolver._inGke()) self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), - compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints())) + compat.as_bytes(TPUClusterResolver._gkeEndpoints())) - resolver = cluster_resolver.TPUClusterResolver() + resolver = TPUClusterResolver() self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), compat.as_bytes(resolver.master())) @@ -493,15 +492,15 @@ class TPUClusterResolverTest(test.TestCase): 'grpc://10.120.27.8:8470') self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ) - self.assertTrue(cluster_resolver.TPUClusterResolver._inGke()) + self.assertTrue(TPUClusterResolver._inGke()) self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470,' 'grpc://10.120.27.6:8470,' 'grpc://10.120.27.7:8470,' 'grpc://10.120.27.8:8470'), - compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints())) + compat.as_bytes(TPUClusterResolver._gkeEndpoints())) - resolver = cluster_resolver.TPUClusterResolver() + resolver = TPUClusterResolver() self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), compat.as_bytes(resolver.master())) @@ -522,17 +521,17 @@ class TPUClusterResolverTest(test.TestCase): def testEnvironmentDiscoveryUrl(self): os.environ['TPU_API_DISCOVERY_URL'] = 'https://{api}.internal/{apiVersion}' self.assertEqual('https://{api}.internal/{apiVersion}', - (cluster_resolver.TPUClusterResolver. + (TPUClusterResolver. _environmentDiscoveryUrl())) def testEnvironmentAndRpcDetectionForGoogle(self): - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( tpu='/bns/ab/cd/ef') self.assertEqual(resolver.environment, 'google') self.assertEqual(resolver.rpc_layer, None) def testEnvironmentAndRpcDetectionForGrpcString(self): - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( tpu='grpc://10.1.2.3:8470') self.assertEqual(resolver.environment, '') self.assertEqual(resolver.rpc_layer, 'grpc') @@ -564,7 +563,7 @@ class TPUClusterResolverTest(test.TestCase): } } - resolver = cluster_resolver.TPUClusterResolver( + resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', tpu='test-tpu-1', @@ -599,7 +598,7 @@ class TPUClusterResolverTest(test.TestCase): name, 'TPU', 1024, 0) for name in device_names ] - device_details = tpu_cluster_resolver._get_device_dict_and_cores( + device_details = TPUClusterResolver._get_device_dict_and_cores( device_list) self.assertEqual(device_details.total_cores, 8) self.assertEqual(device_details.device_map, @@ -624,24 +623,24 @@ class TPUClusterResolverTest(test.TestCase): name, 'XLA', 1024, 0) for name in device_names ] - device_dict, num_cores = tpu_cluster_resolver._get_device_dict_and_cores( + device_dict, num_cores = TPUClusterResolver._get_device_dict_and_cores( device_list) self.assertEqual(num_cores, 0) self.assertEqual(device_dict, {}) def testVerifySameCoreCount(self): self.assertEqual( - tpu_cluster_resolver._verify_and_return_same_core_count( + TPUClusterResolver._verify_and_return_same_core_count( {0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8) self.assertEqual( - tpu_cluster_resolver._verify_and_return_same_core_count( + TPUClusterResolver._verify_and_return_same_core_count( {0: [0, 1], 1: [2, 3]}), 2) with self.assertRaises(RuntimeError): - tpu_cluster_resolver._verify_and_return_same_core_count( + TPUClusterResolver._verify_and_return_same_core_count( {0: [0], 1: [1, 2]}) @mock.patch.object(session.BaseSession, 'list_devices') - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_isRunningInGCE', mock_is_not_running_in_gce) def testNumAcceleratorsSuccess(self, mock_list_devices): @@ -661,15 +660,15 @@ class TPUClusterResolverTest(test.TestCase): ] mock_list_devices.return_value = device_list - resolver = cluster_resolver.TPUClusterResolver(tpu='') + resolver = TPUClusterResolver(tpu='') self.assertEqual(resolver.num_accelerators(), 2) @mock.patch.object(session.BaseSession, 'list_devices') - @mock.patch.object(cluster_resolver.TPUClusterResolver, + @mock.patch.object(TPUClusterResolver, '_isRunningInGCE', mock_is_not_running_in_gce) def testNumAcceleratorsRetryFailure(self, mock_list_devices): - resolver = cluster_resolver.TPUClusterResolver(tpu='') + resolver = TPUClusterResolver(tpu='') mock_list_devices.side_effect = errors.DeadlineExceededError( None, None, 'timeout') with self.assertRaises(RuntimeError): diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl index 7637cbd584a..2bac83c423c 100644 --- a/tensorflow/python/tools/api/generator/api_init_files.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files.bzl @@ -14,6 +14,7 @@ TENSORFLOW_API_INIT_FILES = [ "data/experimental/__init__.py", "debugging/__init__.py", "distribute/__init__.py", + "distribute/cluster_resolver/__init__.py", "dtypes/__init__.py", "errors/__init__.py", "experimental/__init__.py", diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl index 5db84b1d0be..21e59510102 100644 --- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl +++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl @@ -15,6 +15,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [ "data/experimental/__init__.py", "debugging/__init__.py", "distribute/__init__.py", + "distribute/cluster_resolver/__init__.py", "distributions/__init__.py", "dtypes/__init__.py", "errors/__init__.py", diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt new file mode 100644 index 00000000000..0b35b61b4c0 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt @@ -0,0 +1,24 @@ +path: "tensorflow.distribute.cluster_resolver.ClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt new file mode 100644 index 00000000000..5c2cc522f1c --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt new file mode 100644 index 00000000000..3220d68e054 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt @@ -0,0 +1,26 @@ +path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt new file mode 100644 index 00000000000..4e80e3af308 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt new file mode 100644 index 00000000000..971ea3dca41 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt @@ -0,0 +1,30 @@ +path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_task_info" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt new file mode 100644 index 00000000000..5f9a430c0f8 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt new file mode 100644 index 00000000000..ca22c85ac0a --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt @@ -0,0 +1,34 @@ +path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_job_name" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_master" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt new file mode 100644 index 00000000000..179848aca39 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.UnionResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.pbtxt new file mode 100644 index 00000000000..5906ffa850a --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.cluster_resolver.pbtxt @@ -0,0 +1,35 @@ +path: "tensorflow.distribute.cluster_resolver" +tf_module { + member { + name: "ClusterResolver" + mtype: "" + } + member { + name: "GCEClusterResolver" + mtype: "" + } + member { + name: "KubernetesClusterResolver" + mtype: "" + } + member { + name: "SimpleClusterResolver" + mtype: "" + } + member { + name: "SlurmClusterResolver" + mtype: "" + } + member { + name: "TFConfigClusterResolver" + mtype: "" + } + member { + name: "TPUClusterResolver" + mtype: "" + } + member { + name: "UnionResolver" + mtype: "" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt index 31dc6e07161..5b5c9e2df13 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt @@ -32,6 +32,10 @@ tf_module { name: "StrategyExtended" mtype: "" } + member { + name: "cluster_resolver" + mtype: "" + } member_method { name: "get_loss_reduction" argspec: "args=[], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt new file mode 100644 index 00000000000..0b35b61b4c0 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-cluster-resolver.pbtxt @@ -0,0 +1,24 @@ +path: "tensorflow.distribute.cluster_resolver.ClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt new file mode 100644 index 00000000000..5c2cc522f1c --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-g-c-e-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt new file mode 100644 index 00000000000..3220d68e054 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-kubernetes-cluster-resolver.pbtxt @@ -0,0 +1,26 @@ +path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt new file mode 100644 index 00000000000..4e80e3af308 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-simple-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt new file mode 100644 index 00000000000..971ea3dca41 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-slurm-cluster-resolver.pbtxt @@ -0,0 +1,30 @@ +path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_task_info" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt new file mode 100644 index 00000000000..5f9a430c0f8 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-f-config-cluster-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt new file mode 100644 index 00000000000..ca22c85ac0a --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-t-p-u-cluster-resolver.pbtxt @@ -0,0 +1,34 @@ +path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], " + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_job_name" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_master" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt new file mode 100644 index 00000000000..179848aca39 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.-union-resolver.pbtxt @@ -0,0 +1,38 @@ +path: "tensorflow.distribute.cluster_resolver.UnionResolver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "environment" + mtype: "" + } + member { + name: "rpc_layer" + mtype: "" + } + member { + name: "task_id" + mtype: "" + } + member { + name: "task_type" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" + } + member_method { + name: "cluster_spec" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "master" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + } + member_method { + name: "num_accelerators" + argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.pbtxt new file mode 100644 index 00000000000..5906ffa850a --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.cluster_resolver.pbtxt @@ -0,0 +1,35 @@ +path: "tensorflow.distribute.cluster_resolver" +tf_module { + member { + name: "ClusterResolver" + mtype: "" + } + member { + name: "GCEClusterResolver" + mtype: "" + } + member { + name: "KubernetesClusterResolver" + mtype: "" + } + member { + name: "SimpleClusterResolver" + mtype: "" + } + member { + name: "SlurmClusterResolver" + mtype: "" + } + member { + name: "TFConfigClusterResolver" + mtype: "" + } + member { + name: "TPUClusterResolver" + mtype: "" + } + member { + name: "UnionResolver" + mtype: "" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt index 31dc6e07161..5b5c9e2df13 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt @@ -32,6 +32,10 @@ tf_module { name: "StrategyExtended" mtype: "" } + member { + name: "cluster_resolver" + mtype: "" + } member_method { name: "get_loss_reduction" argspec: "args=[], varargs=None, keywords=None, defaults=None"