Making Cluster Resolvers visible by default for TensorFlow 2.0 transition
PiperOrigin-RevId: 231328553
This commit is contained in:
parent
e7f1a44a5f
commit
a39f31a942
@ -23,7 +23,7 @@ from __future__ import print_function
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
|
||||
@ -36,7 +36,7 @@ _allowed_symbols = [
|
||||
'ClusterResolver',
|
||||
'SimpleClusterResolver',
|
||||
'UnionClusterResolver',
|
||||
'GceClusterResolver',
|
||||
'GCEClusterResolver',
|
||||
'KubernetesClusterResolver',
|
||||
'TFConfigClusterResolver',
|
||||
'TPUClusterResolver',
|
||||
|
@ -25,7 +25,7 @@ from __future__ import print_function
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
|
||||
@ -43,7 +43,7 @@ _allowed_symbols = [
|
||||
'ClusterResolver',
|
||||
'SimpleClusterResolver',
|
||||
'UnionClusterResolver',
|
||||
'GceClusterResolver',
|
||||
'GCEClusterResolver',
|
||||
'KubernetesClusterResolver',
|
||||
'TFConfigClusterResolver',
|
||||
'TPUClusterResolver',
|
||||
|
@ -12,7 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Stub file for GceClusterResolver to maintain backwards compatibility."""
|
||||
"""Stub file for GCEClusterResolver to maintain backwards compatibility."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
@ -23,13 +23,14 @@ from __future__ import print_function
|
||||
# existing OSS code will not be broken.
|
||||
|
||||
# pylint: disable=unused-import
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
|
||||
# pylint: enable=unused-import
|
||||
|
||||
from tensorflow.python.util.all_util import remove_undocumented
|
||||
|
||||
|
||||
_allowed_symbols = [
|
||||
'GceClusterResolver',
|
||||
'GCEClusterResolver',
|
||||
]
|
||||
|
||||
remove_undocumented(__name__, _allowed_symbols)
|
||||
|
@ -39,7 +39,7 @@ from tensorflow.python.distribute import input_lib
|
||||
from tensorflow.python.distribute import numpy_dataset
|
||||
from tensorflow.python.distribute import reduce_util
|
||||
from tensorflow.python.distribute import values
|
||||
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver_lib
|
||||
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
|
||||
from tensorflow.python.eager import context
|
||||
from tensorflow.python.eager import tape
|
||||
from tensorflow.python.framework import constant_op
|
||||
@ -65,7 +65,7 @@ def initialize_tpu_system(cluster_resolver=None):
|
||||
The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
|
||||
"""
|
||||
if cluster_resolver is None:
|
||||
cluster_resolver = resolver_lib.TPUClusterResolver("")
|
||||
cluster_resolver = TPUClusterResolver("")
|
||||
master = cluster_resolver.master()
|
||||
|
||||
logging.info("Initializing the TPU system.")
|
||||
@ -246,7 +246,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
|
||||
super(TPUExtended, self).__init__(container_strategy)
|
||||
|
||||
if tpu_cluster_resolver is None:
|
||||
tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")
|
||||
tpu_cluster_resolver = TPUClusterResolver("")
|
||||
|
||||
if steps_per_run is None:
|
||||
# TODO(frankchn): Warn when we are being used by DS/Keras and this is
|
||||
|
@ -19,6 +19,7 @@ from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
# pylint: disable=unused-import
|
||||
from tensorflow.python.distribute import cluster_resolver
|
||||
from tensorflow.python.distribute import distribute_lib
|
||||
from tensorflow.python.distribute import distribution_strategy_context
|
||||
from tensorflow.python.distribute import mirrored_strategy
|
||||
|
@ -18,40 +18,11 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver import cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import gce_cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import kubernetes_cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import slurm_cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import tfconfig_cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import SimpleClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import UnionClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GceClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver import GCEClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver import KubernetesClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver import SlurmClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver import TFConfigClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver import TPUClusterResolver
|
||||
|
||||
from tensorflow.python.util.all_util import remove_undocumented
|
||||
|
||||
_allowed_symbols = [
|
||||
'cluster_resolver',
|
||||
'gce_cluster_resolver',
|
||||
'kubernetes_cluster_resolver',
|
||||
'slurm_cluster_resolver',
|
||||
'tfconfig_cluster_resolver',
|
||||
'tpu_cluster_resolver',
|
||||
'ClusterResolver',
|
||||
'SimpleClusterResolver',
|
||||
'UnionClusterResolver',
|
||||
'GceClusterResolver',
|
||||
'KubernetesClusterResolver',
|
||||
'TFConfigClusterResolver',
|
||||
'TPUClusterResolver',
|
||||
'SlurmClusterResolver',
|
||||
]
|
||||
|
||||
remove_undocumented(__name__, _allowed_symbols)
|
||||
|
||||
|
@ -20,11 +20,13 @@ from __future__ import print_function
|
||||
|
||||
import abc
|
||||
|
||||
import collections
|
||||
import six
|
||||
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.training.server_lib import ClusterSpec
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
def format_master_url(master, rpc_layer=None):
|
||||
@ -42,6 +44,7 @@ def get_accelerator_devices(master, config_proto):
|
||||
return devices
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.ClusterResolver')
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class ClusterResolver(object):
|
||||
"""Abstract class for all implementations of ClusterResolvers.
|
||||
@ -104,17 +107,14 @@ class ClusterResolver(object):
|
||||
def num_accelerators(self,
|
||||
task_type=None,
|
||||
task_id=None,
|
||||
accelerator_type='GPU',
|
||||
config_proto=None):
|
||||
"""Returns the number of accelerator cores per worker.
|
||||
|
||||
This returns the number of accelerator cores (such as GPUs and TPUs)
|
||||
available per worker. If workers only has CPU cores available, then this
|
||||
should return 0. This method will query the master for this information
|
||||
if it is not otherwise known.
|
||||
available per worker.
|
||||
|
||||
Optionally, we allow callers to specify the task_type, task_id, and
|
||||
rpc_layer, if they want to target a specific TensorFlow process to query
|
||||
Optionally, we allow callers to specify the task_type, and task_id, for
|
||||
if they want to target a specific TensorFlow process to query
|
||||
the number of accelerators. This is to support heterogenous environments,
|
||||
where the number of accelerators cores per host is different.
|
||||
|
||||
@ -123,21 +123,39 @@ class ClusterResolver(object):
|
||||
want to query.
|
||||
task_id: (Optional) The index of the TensorFlow task of the machine we
|
||||
want to query.
|
||||
accelerator_type: (Optional) The type of accelerator we are trying to
|
||||
query (defaults to 'GPU').
|
||||
config_proto: (Optional) Configuration for starting a new session to
|
||||
query how many accelerator cores it has.
|
||||
|
||||
Returns:
|
||||
A map of accelerator types to number of cores.
|
||||
"""
|
||||
master = self.master(task_type, task_id)
|
||||
devices = get_accelerator_devices(master, config_proto)
|
||||
return sum(1 for d in devices if d.device_type == accelerator_type)
|
||||
mapping = collections.defaultdict(int)
|
||||
for device in devices:
|
||||
mapping[device.device_type] += 1
|
||||
return mapping
|
||||
|
||||
@abc.abstractproperty
|
||||
@property
|
||||
def environment(self):
|
||||
"""Returns the current environment which TensorFlow is running in."""
|
||||
raise NotImplementedError()
|
||||
"""Returns the current environment which TensorFlow is running in.
|
||||
|
||||
There are two possible return values, "google" (when TensorFlow is running
|
||||
in a Google-internal environment) or an empty string (when TensorFlow is
|
||||
running elsewhere).
|
||||
|
||||
If you are implementing a ClusterResolver that works in both the Google
|
||||
environment and the open-source world (for instance, a TPU ClusterResolver
|
||||
or similar), you will have to return the appropriate string depending on the
|
||||
environment, which you will have to detect.
|
||||
|
||||
Otherwise, if you are implementing a ClusterResolver that will only work
|
||||
in open-source TensorFlow, you do not need to implement this property.
|
||||
"""
|
||||
return ''
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.SimpleClusterResolver')
|
||||
class SimpleClusterResolver(ClusterResolver):
|
||||
"""Simple implementation of ClusterResolver that accepts a ClusterSpec."""
|
||||
|
||||
@ -237,6 +255,7 @@ class SimpleClusterResolver(ClusterResolver):
|
||||
self._rpc_layer = rpc_layer
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.UnionResolver')
|
||||
class UnionClusterResolver(ClusterResolver):
|
||||
"""Performs a union on underlying ClusterResolvers.
|
||||
|
||||
|
@ -57,24 +57,28 @@ class BaseClusterResolverTest(test.TestCase):
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = MockBaseClusterResolver()
|
||||
self.assertEqual(resolver.num_accelerators(), 4)
|
||||
self.assertEqual(resolver.num_accelerators(), {"GPU": 4})
|
||||
|
||||
@mock.patch.object(session.BaseSession, "list_devices")
|
||||
def testNumAcceleratorsFilterSuccess(self, mock_list_devices):
|
||||
def testNumAcceleratorsMultiDeviceSuccess(self, mock_list_devices):
|
||||
device_names = [
|
||||
"/job:worker/task:0/device:TPU:0",
|
||||
"/job:worker/task:0/device:TPU:1",
|
||||
"/job:worker/task:0/device:TPU:2",
|
||||
"/job:worker/task:0/device:TPU:3",
|
||||
"/job:worker/task:0/device:GPU:0",
|
||||
"/job:worker/task:0/device:GPU:1",
|
||||
"/job:worker/task:0/device:GPU:2",
|
||||
"/job:worker/task:0/device:GPU:3",
|
||||
]
|
||||
device_list = [
|
||||
session._DeviceAttributes(
|
||||
name, "TPU", 1024, 0) for name in device_names
|
||||
name, name[26:29], 1024, 0) for name in device_names
|
||||
]
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = MockBaseClusterResolver()
|
||||
self.assertEqual(resolver.num_accelerators(), 0)
|
||||
self.assertEqual(resolver.num_accelerators(), {"TPU": 4, "GPU": 4})
|
||||
|
||||
|
||||
class UnionClusterResolverTest(test.TestCase):
|
||||
|
@ -20,6 +20,8 @@ from __future__ import print_function
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.training.server_lib import ClusterSpec
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
_GOOGLE_API_CLIENT_INSTALLED = True
|
||||
try:
|
||||
@ -29,11 +31,8 @@ except ImportError:
|
||||
_GOOGLE_API_CLIENT_INSTALLED = False
|
||||
|
||||
|
||||
def _format_master_url(master, rpc_layer=None):
|
||||
return '%s://%s' % (rpc_layer, master) if rpc_layer else master
|
||||
|
||||
|
||||
class GceClusterResolver(ClusterResolver):
|
||||
@tf_export('distribute.cluster_resolver.GCEClusterResolver')
|
||||
class GCEClusterResolver(ClusterResolver):
|
||||
"""Cluster Resolver for Google Compute Engine.
|
||||
|
||||
This is an implementation of cluster resolvers for the Google Compute Engine
|
||||
@ -53,9 +52,9 @@ class GceClusterResolver(ClusterResolver):
|
||||
rpc_layer='grpc',
|
||||
credentials='default',
|
||||
service=None):
|
||||
"""Creates a new GceClusterResolver object.
|
||||
"""Creates a new GCEClusterResolver object.
|
||||
|
||||
This takes in a few parameters and creates a GceClusterResolver project. It
|
||||
This takes in a few parameters and creates a GCEClusterResolver project. It
|
||||
will then use these parameters to query the GCE API for the IP addresses of
|
||||
each instance in the instance group.
|
||||
|
||||
@ -173,23 +172,13 @@ class GceClusterResolver(ClusterResolver):
|
||||
@task_type.setter
|
||||
def task_type(self, task_type):
|
||||
raise RuntimeError(
|
||||
'You cannot reset the task_type of the GceClusterResolver after it has '
|
||||
'You cannot reset the task_type of the GCEClusterResolver after it has '
|
||||
'been created.')
|
||||
|
||||
@task_id.setter
|
||||
def task_id(self, task_id):
|
||||
self._task_id = task_id
|
||||
|
||||
@property
|
||||
def environment(self):
|
||||
"""Returns the current environment which TensorFlow is running in.
|
||||
|
||||
For users in the GCE environment, the environment property is always an
|
||||
empty string, and Google users will not use this ClusterResolver for running
|
||||
on internal systems.
|
||||
"""
|
||||
return ''
|
||||
|
||||
@property
|
||||
def rpc_layer(self):
|
||||
return self._rpc_layer
|
||||
|
@ -12,13 +12,13 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Tests for GceClusterResolver."""
|
||||
"""Tests for GCEClusterResolver."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver import GceClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver import GCEClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver
|
||||
from tensorflow.python.platform import test
|
||||
from tensorflow.python.training import server_lib
|
||||
@ -27,7 +27,7 @@ from tensorflow.python.training import server_lib
|
||||
mock = test.mock
|
||||
|
||||
|
||||
class GceClusterResolverTest(test.TestCase):
|
||||
class GCEClusterResolverTest(test.TestCase):
|
||||
|
||||
def _verifyClusterSpecEquality(self, cluster_spec, expected_proto):
|
||||
self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def())
|
||||
@ -121,7 +121,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
return self.standard_mock_service_client(mock_instance_group, mock_instance)
|
||||
|
||||
def testSimpleSuccessfulRetrieval(self):
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -136,7 +136,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
|
||||
|
||||
def testMasterRetrieval(self):
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -153,7 +153,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
{'name': 'instance3', 'ip': '10.3.4.5'},
|
||||
]
|
||||
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -172,7 +172,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
{'name': 'instance3', 'ip': '10.3.4.5'},
|
||||
]
|
||||
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -196,7 +196,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
{'name': 'instance3', 'ip': '10.3.4.5'},
|
||||
]
|
||||
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -210,7 +210,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
task_type='', task_id=0), 'grpc://10.1.2.3:8470')
|
||||
|
||||
def testCustomJobNameAndPortRetrieval(self):
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -232,7 +232,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
{'name': 'instance3', 'ip': '10.3.4.5'},
|
||||
]
|
||||
|
||||
gce_cluster_resolver = GceClusterResolver(
|
||||
gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -266,7 +266,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
{'name': 'ps2', 'ip': '10.100.2.3'},
|
||||
]
|
||||
|
||||
worker1_gce_cluster_resolver = GceClusterResolver(
|
||||
worker1_gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -275,7 +275,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
credentials=None,
|
||||
service=self.gen_standard_mock_service_client(worker1_name_to_ip))
|
||||
|
||||
worker2_gce_cluster_resolver = GceClusterResolver(
|
||||
worker2_gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
@ -284,7 +284,7 @@ class GceClusterResolverTest(test.TestCase):
|
||||
credentials=None,
|
||||
service=self.gen_standard_mock_service_client(worker2_name_to_ip))
|
||||
|
||||
ps_gce_cluster_resolver = GceClusterResolver(
|
||||
ps_gce_cluster_resolver = GCEClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-east1-d',
|
||||
instance_group='test-instance-group',
|
||||
|
@ -21,6 +21,7 @@ from __future__ import print_function
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
|
||||
from tensorflow.python.training import server_lib
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
_KUBERNETES_API_CLIENT_INSTALLED = True
|
||||
try:
|
||||
@ -30,6 +31,7 @@ except ImportError:
|
||||
_KUBERNETES_API_CLIENT_INSTALLED = False
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.KubernetesClusterResolver')
|
||||
class KubernetesClusterResolver(ClusterResolver):
|
||||
"""Cluster Resolver for Kubernetes.
|
||||
|
||||
@ -154,13 +156,3 @@ class KubernetesClusterResolver(ClusterResolver):
|
||||
cluster_map[tf_job] = all_pods
|
||||
|
||||
return server_lib.ClusterSpec(cluster_map)
|
||||
|
||||
@property
|
||||
def environment(self):
|
||||
"""Returns the current environment which TensorFlow is running in.
|
||||
|
||||
For users in the Cloud environment, the environment property is always an
|
||||
empty string, and Google users will not use this ClusterResolver for running
|
||||
on internal systems.
|
||||
"""
|
||||
return ''
|
||||
|
@ -25,8 +25,10 @@ import subprocess
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
|
||||
from tensorflow.python.training.server_lib import ClusterSpec
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.SlurmClusterResolver')
|
||||
class SlurmClusterResolver(ClusterResolver):
|
||||
"""Cluster Resolver for system with Slurm workload manager.
|
||||
|
||||
@ -215,16 +217,6 @@ class SlurmClusterResolver(ClusterResolver):
|
||||
|
||||
return ''
|
||||
|
||||
@property
|
||||
def environment(self):
|
||||
"""Returns the current environment which TensorFlow is running in.
|
||||
|
||||
For users in the Slurm environment, the environment property is always an
|
||||
empty string, and Google users will not use this ClusterResolver for running
|
||||
on internal systems.
|
||||
"""
|
||||
return ''
|
||||
|
||||
def num_accelerators(self,
|
||||
task_type=None,
|
||||
task_id=None,
|
||||
|
@ -24,6 +24,7 @@ import os
|
||||
|
||||
from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
|
||||
from tensorflow.python.training.server_lib import ClusterSpec
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
_TF_CONFIG_ENV = 'TF_CONFIG'
|
||||
_SESSION_MASTER_KEY = 'session_master'
|
||||
@ -47,6 +48,7 @@ def _get_value_in_tfconfig(key, default=None):
|
||||
return tf_config[key] if key in tf_config else default
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.TFConfigClusterResolver')
|
||||
class TFConfigClusterResolver(ClusterResolver):
|
||||
"""Implementation of a ClusterResolver which reads the TF_CONFIG EnvVar."""
|
||||
|
||||
|
@ -34,6 +34,7 @@ from tensorflow.python.framework import errors
|
||||
from tensorflow.python.platform import tf_logging as logging
|
||||
from tensorflow.python.training import server_lib
|
||||
from tensorflow.python.util import compat
|
||||
from tensorflow.python.util.tf_export import tf_export
|
||||
|
||||
_GOOGLE_API_CLIENT_INSTALLED = True
|
||||
try:
|
||||
@ -42,7 +43,6 @@ try:
|
||||
except ImportError:
|
||||
_GOOGLE_API_CLIENT_INSTALLED = False
|
||||
|
||||
|
||||
_GKE_ENV_VARIABLE = 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'
|
||||
_ENDPOINTS_SEPARATOR = ','
|
||||
_DEFAULT_ENV_VARIABLE = 'TPU_NAME'
|
||||
@ -56,38 +56,7 @@ DeviceDetails = collections.namedtuple(
|
||||
'DeviceDetails', ['device_map', 'total_cores'])
|
||||
|
||||
|
||||
def _get_device_dict_and_cores(devices):
|
||||
"""Returns a dict of hosts to cores and total cores given devices names.
|
||||
|
||||
Returns a namedtuple with two attributes:
|
||||
device_map: A map of host_ids to a list of core_ids.
|
||||
total_cores: The total number of cores within the TPU system.
|
||||
|
||||
Args:
|
||||
devices: A list of devices returned by session.list_devices()
|
||||
"""
|
||||
device_map = collections.defaultdict(list)
|
||||
num_cores = 0
|
||||
for device in devices:
|
||||
match = _TPU_DEVICE_REGEX.match(device.name)
|
||||
if match:
|
||||
host_id = match.group('host_id')
|
||||
core_id = match.group('core_id')
|
||||
device_map[host_id].append(core_id)
|
||||
num_cores += 1
|
||||
return DeviceDetails(device_map, num_cores)
|
||||
|
||||
|
||||
def _verify_and_return_same_core_count(device_dict):
|
||||
"""Verifies that every device in device_dict has the same number of cores."""
|
||||
num_cores_per_host_set = (
|
||||
{len(core_ids) for core_ids in device_dict.values()})
|
||||
if len(num_cores_per_host_set) != 1:
|
||||
raise RuntimeError('TPU cores on each device is not the same. This '
|
||||
'should never happen. Devices: {}'.format(device_dict))
|
||||
return num_cores_per_host_set.pop()
|
||||
|
||||
|
||||
@tf_export('distribute.cluster_resolver.TPUClusterResolver')
|
||||
class TPUClusterResolver(ClusterResolver):
|
||||
"""Cluster Resolver for Google Cloud TPUs.
|
||||
|
||||
@ -143,6 +112,38 @@ class TPUClusterResolver(ClusterResolver):
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _get_device_dict_and_cores(devices):
|
||||
"""Returns a dict of hosts to cores and total cores given devices names.
|
||||
|
||||
Returns a namedtuple with two attributes:
|
||||
device_map: A map of host_ids to a list of core_ids.
|
||||
total_cores: The total number of cores within the TPU system.
|
||||
|
||||
Args:
|
||||
devices: A list of devices returned by session.list_devices()
|
||||
"""
|
||||
device_map = collections.defaultdict(list)
|
||||
num_cores = 0
|
||||
for device in devices:
|
||||
match = _TPU_DEVICE_REGEX.match(device.name)
|
||||
if match:
|
||||
host_id = match.group('host_id')
|
||||
core_id = match.group('core_id')
|
||||
device_map[host_id].append(core_id)
|
||||
num_cores += 1
|
||||
return DeviceDetails(device_map, num_cores)
|
||||
|
||||
@staticmethod
|
||||
def _verify_and_return_same_core_count(device_dict):
|
||||
"""Verifies that every device in device_dict has the same # of cores."""
|
||||
num_cores_per_host_set = (
|
||||
{len(core_ids) for core_ids in device_dict.values()})
|
||||
if len(num_cores_per_host_set) != 1:
|
||||
raise RuntimeError('TPU cores on each device is not the same. This '
|
||||
'should never happen. Devices: {}'.format(device_dict))
|
||||
return num_cores_per_host_set.pop()
|
||||
|
||||
@staticmethod
|
||||
def _inGke():
|
||||
"""When running in GKE, the environment variable will be set."""
|
||||
@ -482,7 +483,7 @@ class TPUClusterResolver(ClusterResolver):
|
||||
# TODO(b/120564445): Replace with standard library for retries.
|
||||
while True:
|
||||
try:
|
||||
device_details = _get_device_dict_and_cores(
|
||||
device_details = TPUClusterResolver._get_device_dict_and_cores(
|
||||
get_accelerator_devices(self.master(), config_proto=config_proto))
|
||||
break
|
||||
except errors.DeadlineExceededError:
|
||||
@ -497,7 +498,8 @@ class TPUClusterResolver(ClusterResolver):
|
||||
raise RuntimeError(error_message)
|
||||
|
||||
if device_details.total_cores:
|
||||
return _verify_and_return_same_core_count(device_details.device_map)
|
||||
return TPUClusterResolver._verify_and_return_same_core_count(
|
||||
device_details.device_map)
|
||||
return 0
|
||||
|
||||
@property
|
||||
|
@ -24,8 +24,7 @@ import six
|
||||
from six.moves.urllib.error import URLError
|
||||
|
||||
from tensorflow.python.client import session
|
||||
from tensorflow.python.distribute import cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
|
||||
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
|
||||
from tensorflow.python.framework import errors
|
||||
from tensorflow.python.platform import test
|
||||
from tensorflow.python.training import server_lib
|
||||
@ -129,26 +128,26 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
|
||||
return mock_client
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_isRunningInGCE',
|
||||
mock_is_running_in_gce)
|
||||
def testCheckRunningInGceWithNoTpuName(self):
|
||||
with self.assertRaisesRegexp(RuntimeError, '.*Google Cloud.*'):
|
||||
cluster_resolver.TPUClusterResolver(tpu='')
|
||||
TPUClusterResolver(tpu='')
|
||||
|
||||
@mock.patch.object(six.moves.urllib.request,
|
||||
'urlopen',
|
||||
mock_running_in_gce_urlopen)
|
||||
def testIsRunningInGce(self):
|
||||
self.assertTrue(cluster_resolver.TPUClusterResolver._isRunningInGCE())
|
||||
self.assertTrue(TPUClusterResolver._isRunningInGCE())
|
||||
|
||||
@mock.patch.object(six.moves.urllib.request,
|
||||
'urlopen',
|
||||
mock_not_running_in_gce_urlopen)
|
||||
def testIsNotRunningInGce(self):
|
||||
self.assertFalse(cluster_resolver.TPUClusterResolver._isRunningInGCE())
|
||||
self.assertFalse(TPUClusterResolver._isRunningInGCE())
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_requestComputeMetadata',
|
||||
mock_request_compute_metadata)
|
||||
def testRetrieveProjectAndZoneFromMetadata(self):
|
||||
@ -160,7 +159,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project=None,
|
||||
zone=None,
|
||||
tpu=['test-tpu-1'],
|
||||
@ -182,7 +181,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
|
||||
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_requestComputeMetadata',
|
||||
mock_request_compute_metadata)
|
||||
def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self):
|
||||
@ -194,7 +193,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project=None,
|
||||
zone=None,
|
||||
tpu=['test-tpu-1'],
|
||||
@ -209,7 +208,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
|
||||
self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_requestComputeMetadata',
|
||||
mock_request_compute_metadata)
|
||||
def testUnhealthyCloudTpu(self):
|
||||
@ -221,7 +220,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project=None,
|
||||
zone=None,
|
||||
tpu='test-tpu-1',
|
||||
@ -232,7 +231,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
with self.assertRaises(RuntimeError):
|
||||
resolver.cluster_spec()
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_requestComputeMetadata',
|
||||
mock_request_compute_metadata)
|
||||
def testNotReadyCloudTpu(self):
|
||||
@ -244,7 +243,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project=None,
|
||||
zone=None,
|
||||
tpu='test-tpu-1',
|
||||
@ -264,7 +263,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu=['test-tpu-1'],
|
||||
@ -292,7 +291,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu='test-tpu-1',
|
||||
@ -309,7 +308,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
|
||||
self.assertEqual('grpc://10.2.3.4:8470', resolver.master())
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_requestComputeMetadata',
|
||||
mock_request_compute_metadata)
|
||||
def testPodResolution(self):
|
||||
@ -338,7 +337,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
tpu='test-tpu-1',
|
||||
credentials=None,
|
||||
service=self.mock_service_client(tpu_map=tpu_map),
|
||||
@ -387,7 +386,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu='test-tpu-1',
|
||||
@ -412,7 +411,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
tpu_map = {}
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
cluster_resolver.TPUClusterResolver(
|
||||
TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu=[],
|
||||
@ -422,7 +421,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
|
||||
# TODO(saeta): Convert to parameterized test when included in OSS TF.
|
||||
def verifyShouldResolve(self, tpu, should_resolve):
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu=tpu,
|
||||
@ -432,7 +431,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
self.assertEqual(should_resolve, resolver._shouldResolve(),
|
||||
"TPU: '%s'" % tpu)
|
||||
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_isRunningInGCE',
|
||||
mock_is_not_running_in_gce)
|
||||
def testShouldResolveNoName(self):
|
||||
@ -457,7 +456,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
self.verifyShouldResolve('grpctpu', True)
|
||||
|
||||
def testNoCallComputeMetadata(self):
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
tpu='/bns/foo/bar')
|
||||
self.assertEqual('/bns/foo/bar', resolver.master())
|
||||
self.assertEqual(None, resolver.cluster_spec())
|
||||
@ -466,12 +465,12 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470'
|
||||
|
||||
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
|
||||
self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
|
||||
self.assertTrue(TPUClusterResolver._inGke())
|
||||
self.assertEqual(
|
||||
compat.as_bytes('grpc://10.120.27.5:8470'),
|
||||
compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
|
||||
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver()
|
||||
resolver = TPUClusterResolver()
|
||||
self.assertEqual(
|
||||
compat.as_bytes('grpc://10.120.27.5:8470'),
|
||||
compat.as_bytes(resolver.master()))
|
||||
@ -493,15 +492,15 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
'grpc://10.120.27.8:8470')
|
||||
|
||||
self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
|
||||
self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
|
||||
self.assertTrue(TPUClusterResolver._inGke())
|
||||
self.assertEqual(
|
||||
compat.as_bytes('grpc://10.120.27.5:8470,'
|
||||
'grpc://10.120.27.6:8470,'
|
||||
'grpc://10.120.27.7:8470,'
|
||||
'grpc://10.120.27.8:8470'),
|
||||
compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
|
||||
compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver()
|
||||
resolver = TPUClusterResolver()
|
||||
self.assertEqual(
|
||||
compat.as_bytes('grpc://10.120.27.5:8470'),
|
||||
compat.as_bytes(resolver.master()))
|
||||
@ -522,17 +521,17 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
def testEnvironmentDiscoveryUrl(self):
|
||||
os.environ['TPU_API_DISCOVERY_URL'] = 'https://{api}.internal/{apiVersion}'
|
||||
self.assertEqual('https://{api}.internal/{apiVersion}',
|
||||
(cluster_resolver.TPUClusterResolver.
|
||||
(TPUClusterResolver.
|
||||
_environmentDiscoveryUrl()))
|
||||
|
||||
def testEnvironmentAndRpcDetectionForGoogle(self):
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
tpu='/bns/ab/cd/ef')
|
||||
self.assertEqual(resolver.environment, 'google')
|
||||
self.assertEqual(resolver.rpc_layer, None)
|
||||
|
||||
def testEnvironmentAndRpcDetectionForGrpcString(self):
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
tpu='grpc://10.1.2.3:8470')
|
||||
self.assertEqual(resolver.environment, '')
|
||||
self.assertEqual(resolver.rpc_layer, 'grpc')
|
||||
@ -564,7 +563,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
}
|
||||
}
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(
|
||||
resolver = TPUClusterResolver(
|
||||
project='test-project',
|
||||
zone='us-central1-c',
|
||||
tpu='test-tpu-1',
|
||||
@ -599,7 +598,7 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
name, 'TPU', 1024, 0) for name in device_names
|
||||
]
|
||||
|
||||
device_details = tpu_cluster_resolver._get_device_dict_and_cores(
|
||||
device_details = TPUClusterResolver._get_device_dict_and_cores(
|
||||
device_list)
|
||||
self.assertEqual(device_details.total_cores, 8)
|
||||
self.assertEqual(device_details.device_map,
|
||||
@ -624,24 +623,24 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
name, 'XLA', 1024, 0) for name in device_names
|
||||
]
|
||||
|
||||
device_dict, num_cores = tpu_cluster_resolver._get_device_dict_and_cores(
|
||||
device_dict, num_cores = TPUClusterResolver._get_device_dict_and_cores(
|
||||
device_list)
|
||||
self.assertEqual(num_cores, 0)
|
||||
self.assertEqual(device_dict, {})
|
||||
|
||||
def testVerifySameCoreCount(self):
|
||||
self.assertEqual(
|
||||
tpu_cluster_resolver._verify_and_return_same_core_count(
|
||||
TPUClusterResolver._verify_and_return_same_core_count(
|
||||
{0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8)
|
||||
self.assertEqual(
|
||||
tpu_cluster_resolver._verify_and_return_same_core_count(
|
||||
TPUClusterResolver._verify_and_return_same_core_count(
|
||||
{0: [0, 1], 1: [2, 3]}), 2)
|
||||
with self.assertRaises(RuntimeError):
|
||||
tpu_cluster_resolver._verify_and_return_same_core_count(
|
||||
TPUClusterResolver._verify_and_return_same_core_count(
|
||||
{0: [0], 1: [1, 2]})
|
||||
|
||||
@mock.patch.object(session.BaseSession, 'list_devices')
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_isRunningInGCE',
|
||||
mock_is_not_running_in_gce)
|
||||
def testNumAcceleratorsSuccess(self, mock_list_devices):
|
||||
@ -661,15 +660,15 @@ class TPUClusterResolverTest(test.TestCase):
|
||||
]
|
||||
mock_list_devices.return_value = device_list
|
||||
|
||||
resolver = cluster_resolver.TPUClusterResolver(tpu='')
|
||||
resolver = TPUClusterResolver(tpu='')
|
||||
self.assertEqual(resolver.num_accelerators(), 2)
|
||||
|
||||
@mock.patch.object(session.BaseSession, 'list_devices')
|
||||
@mock.patch.object(cluster_resolver.TPUClusterResolver,
|
||||
@mock.patch.object(TPUClusterResolver,
|
||||
'_isRunningInGCE',
|
||||
mock_is_not_running_in_gce)
|
||||
def testNumAcceleratorsRetryFailure(self, mock_list_devices):
|
||||
resolver = cluster_resolver.TPUClusterResolver(tpu='')
|
||||
resolver = TPUClusterResolver(tpu='')
|
||||
mock_list_devices.side_effect = errors.DeadlineExceededError(
|
||||
None, None, 'timeout')
|
||||
with self.assertRaises(RuntimeError):
|
||||
|
@ -14,6 +14,7 @@ TENSORFLOW_API_INIT_FILES = [
|
||||
"data/experimental/__init__.py",
|
||||
"debugging/__init__.py",
|
||||
"distribute/__init__.py",
|
||||
"distribute/cluster_resolver/__init__.py",
|
||||
"dtypes/__init__.py",
|
||||
"errors/__init__.py",
|
||||
"experimental/__init__.py",
|
||||
|
@ -15,6 +15,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
|
||||
"data/experimental/__init__.py",
|
||||
"debugging/__init__.py",
|
||||
"distribute/__init__.py",
|
||||
"distribute/cluster_resolver/__init__.py",
|
||||
"distributions/__init__.py",
|
||||
"dtypes/__init__.py",
|
||||
"errors/__init__.py",
|
||||
|
@ -0,0 +1,24 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.ClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver.GCEClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver.KubernetesClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.SimpleClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver.SlurmClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_task_info"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver.TFConfigClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_job_name"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_master"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.UnionResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.UnionClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
path: "tensorflow.distribute.cluster_resolver"
|
||||
tf_module {
|
||||
member {
|
||||
name: "ClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "GCEClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "KubernetesClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "SimpleClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "SlurmClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "TFConfigClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "TPUClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "UnionResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
}
|
@ -32,6 +32,10 @@ tf_module {
|
||||
name: "StrategyExtended"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "cluster_resolver"
|
||||
mtype: "<type \'module\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "get_loss_reduction"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
|
@ -0,0 +1,24 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.ClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.GCEClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.gce_cluster_resolver.GCEClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'project\', \'zone\', \'instance_group\', \'port\', \'task_type\', \'task_id\', \'rpc_layer\', \'credentials\', \'service\'], varargs=None, keywords=None, defaults=[\'worker\', \'0\', \'grpc\', \'default\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.KubernetesClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.kubernetes_cluster_resolver.KubernetesClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'job_to_label_mapping\', \'tf_server_port\', \'rpc_layer\', \'override_client\'], varargs=None, keywords=None, defaults=[\'None\', \'8470\', \'grpc\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.SimpleClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.SimpleClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'cluster_spec\', \'master\', \'task_type\', \'task_id\', \'environment\', \'num_accelerators\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'\', \'0\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.SlurmClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.slurm_cluster_resolver.SlurmClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'jobs\', \'port_base\', \'gpus_per_node\', \'gpus_per_task\', \'tasks_per_node\', \'auto_set_gpu\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'8888\', \'1\', \'1\', \'None\', \'True\', \'grpc\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_task_info"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.TFConfigClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tfconfig_cluster_resolver.TFConfigClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\', \'environment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.TPUClusterResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.tpu_cluster_resolver.TPUClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\', \'tpu\', \'zone\', \'project\', \'job_name\', \'coordinator_name\', \'coordinator_address\', \'credentials\', \'service\', \'discovery_url\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'worker\', \'None\', \'None\', \'default\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_job_name"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "get_master"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'TPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
path: "tensorflow.distribute.cluster_resolver.UnionResolver"
|
||||
tf_class {
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.UnionClusterResolver\'>"
|
||||
is_instance: "<class \'tensorflow.python.distribute.cluster_resolver.cluster_resolver.ClusterResolver\'>"
|
||||
is_instance: "<type \'object\'>"
|
||||
member {
|
||||
name: "environment"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "rpc_layer"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_id"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member {
|
||||
name: "task_type"
|
||||
mtype: "<type \'property\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "__init__"
|
||||
argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "cluster_spec"
|
||||
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
|
||||
}
|
||||
member_method {
|
||||
name: "master"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'rpc_layer\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "num_accelerators"
|
||||
argspec: "args=[\'self\', \'task_type\', \'task_id\', \'accelerator_type\', \'config_proto\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'GPU\', \'None\'], "
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
path: "tensorflow.distribute.cluster_resolver"
|
||||
tf_module {
|
||||
member {
|
||||
name: "ClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "GCEClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "KubernetesClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "SimpleClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "SlurmClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "TFConfigClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "TPUClusterResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "UnionResolver"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
}
|
@ -32,6 +32,10 @@ tf_module {
|
||||
name: "StrategyExtended"
|
||||
mtype: "<type \'type\'>"
|
||||
}
|
||||
member {
|
||||
name: "cluster_resolver"
|
||||
mtype: "<type \'module\'>"
|
||||
}
|
||||
member_method {
|
||||
name: "get_loss_reduction"
|
||||
argspec: "args=[], varargs=None, keywords=None, defaults=None"
|
||||
|
Loading…
Reference in New Issue
Block a user