Expose TPUStrategy experimental symbols

PiperOrigin-RevId: 235813783
This commit is contained in:
Sourabh Bajaj 2019-02-26 16:12:21 -08:00 committed by TensorFlower Gardener
parent 4218a23942
commit 2ee3000734
18 changed files with 332 additions and 57 deletions

View File

@ -22,5 +22,5 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import
from tensorflow.python.distribute.tpu_strategy import initialize_tpu_system
from tensorflow.python.distribute.tpu_strategy import TPUStrategy
from tensorflow.python.tpu.tpu_strategy_util import initialize_tpu_system

View File

@ -21,8 +21,6 @@ from __future__ import print_function
import collections
import copy
from tensorflow.core.protobuf import config_pb2
from tensorflow.python.client import session as session_lib
from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib
from tensorflow.python.distribute import device_util
from tensorflow.python.distribute import distribute_lib
@ -32,7 +30,6 @@ from tensorflow.python.distribute import reduce_util
from tensorflow.python.distribute import values
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
from tensorflow.python.eager import context
from tensorflow.python.eager import function
from tensorflow.python.eager import tape
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import device as tf_device
@ -43,64 +40,14 @@ from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.tpu import device_assignment as device_assignment_lib
from tensorflow.python.tpu import functional as tpu_functional_ops
from tensorflow.python.tpu import topology
from tensorflow.python.tpu import tpu
from tensorflow.python.tpu import tpu_strategy_util
from tensorflow.python.tpu import tpu_system_metadata as tpu_system_metadata_lib
from tensorflow.python.tpu import training_loop
from tensorflow.python.tpu.ops import tpu_ops
from tensorflow.python.util import compat
from tensorflow.python.util import nest
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices in a separate session and graph.
Args:
cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
# The easiest way to trigger a rewrite is to run the function with
# TPUPartitionedCallOp.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
# We can't call _tpu_init_fn normally (because it contains just a dummy op,
# see above) but need to define it to get it added to eager context
# and get its assigned name.
# pylint: disable=protected-access
graph_func = _tpu_init_fn._get_concrete_function_internal()
func_name = compat.as_str(graph_func._inference_function.name)
# pylint: enable=protected-access
output = tpu_functional_ops.TPUPartitionedCall(
args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
serialized_topology = output[0].numpy()
else:
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)
from tensorflow.python.util.tf_export import tf_export
def get_tpu_system_metadata(tpu_cluster_resolver):
@ -174,6 +121,7 @@ def _create_tpu_mirrored_variable( # pylint: disable=missing-docstring
return result
@tf_export("distribute.experimental.TPUStrategy")
class TPUStrategy(distribute_lib.DistributionStrategy):
"""TPU distribution strategy implementation."""
@ -506,7 +454,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
This is a private method only to be used by Estimator. Other frameworks
should directly be calling `tf.contrib.distribute.initialize_tpu_system`
"""
initialize_tpu_system(self._tpu_cluster_resolver)
tpu_strategy_util.initialize_tpu_system(self._tpu_cluster_resolver)
def _create_variable(self, next_creator, *args, **kwargs):
"""Create a TPUMirroredVariable. See `DistributionStrategy.scope`."""

View File

@ -47,6 +47,8 @@ TENSORFLOW_API_INIT_FILES = [
"summary/experimental/__init__.py",
"sysconfig/__init__.py",
"test/__init__.py",
"tpu/experimental/__init__.py",
"tpu/__init__.py",
"train/__init__.py",
"train/experimental/__init__.py",
"version/__init__.py",

View File

@ -69,6 +69,8 @@ TENSORFLOW_API_INIT_FILES_V1 = [
"summary/__init__.py",
"sysconfig/__init__.py",
"test/__init__.py",
"tpu/experimental/__init__.py",
"tpu/__init__.py",
"train/__init__.py",
"train/experimental/__init__.py",
"train/queue_runner/__init__.py",

View File

@ -133,6 +133,7 @@ py_library(
"tpu_function.py",
"tpu_optimizer.py",
"tpu_sharding.py",
"tpu_strategy_util.py",
"tpu_system_metadata.py",
"training_loop.py",
"xla.py",
@ -299,6 +300,21 @@ py_library(
],
)
py_library(
name = "tpu_strategy_util",
srcs = ["tpu_strategy_util.py"],
deps = [
":tpu_lib",
"//tensorflow/python:dtypes",
"//tensorflow/python:framework",
"//tensorflow/python:framework_ops",
"//tensorflow/python:util",
"//tensorflow/python/distribute/cluster_resolver:tpu_cluster_resolver_py",
"//tensorflow/python/eager:context",
"//tensorflow/python/eager:tape",
],
)
py_library(
name = "feature_column",
srcs = ["feature_column.py"],

View File

@ -0,0 +1,18 @@
package(
default_visibility = ["//tensorflow:internal"],
)
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
py_library(
name = "experimental",
srcs = [
"__init__.py",
],
srcs_version = "PY2AND3",
deps = [
"//tensorflow/python/tpu:tpu_strategy_util",
],
)

View File

@ -0,0 +1,23 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Experimental TPU library."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import
from tensorflow.python.tpu import tpu_strategy_util
# pylint: enable=unused-import

View File

@ -0,0 +1,82 @@
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TPU specific APIs to be used in conjunction with TPU Strategy."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.core.protobuf import config_pb2
from tensorflow.python.client import session as session_lib
from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
from tensorflow.python.eager import context
from tensorflow.python.eager import function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.tpu import functional as tpu_functional_ops
from tensorflow.python.tpu import topology
from tensorflow.python.tpu import tpu
from tensorflow.python.util import compat
from tensorflow.python.util.tf_export import tf_export
@tf_export("tpu.experimental.initialize_tpu_system")
def initialize_tpu_system(cluster_resolver=None):
"""Initialize the TPU devices.
Args:
cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
which provides information about the TPU cluster.
Returns:
The tf.tpu.Topology object for the topology of the TPU cluster.
"""
if cluster_resolver is None:
cluster_resolver = TPUClusterResolver("")
master = cluster_resolver.master()
logging.info("Initializing the TPU system.")
if context.executing_eagerly():
# This function looks as it is for the following non-intuitive reasons.
# tpu.initialize_system creates a dummy op whose sole purpose is to trigger
# DistributedTPURewritePass. This pass actually adds real ops that
# initialize the TPU system. Thus, we can't simply run tpu.initialize_system
# eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
# The easiest way to trigger a rewrite is to run the function with
# TPUPartitionedCallOp.
@function.defun
def _tpu_init_fn():
return tpu.initialize_system()
# We can't call _tpu_init_fn normally (because it contains just a dummy op,
# see above) but need to define it to get it added to eager context
# and get its assigned name.
# pylint: disable=protected-access
graph_func = _tpu_init_fn._get_concrete_function_internal()
func_name = compat.as_str(graph_func._inference_function.name)
# pylint: enable=protected-access
output = tpu_functional_ops.TPUPartitionedCall(
args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
serialized_topology = output[0].numpy()
else:
session_config = config_pb2.ConfigProto(allow_soft_placement=True)
with ops.Graph().as_default():
with session_lib.Session(config=session_config, target=master) as sess:
serialized_topology = sess.run(tpu.initialize_system())
logging.info("Finished initializing TPU system.")
return topology.Topology(serialized=serialized_topology)

View File

@ -0,0 +1,70 @@
path: "tensorflow.distribute.experimental.TPUStrategy"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.tpu_strategy.TPUStrategy\'>"
is_instance: "<class \'tensorflow.python.distribute.distribute_lib.DistributionStrategy\'>"
is_instance: "<type \'object\'>"
member {
name: "extended"
mtype: "<type \'property\'>"
}
member {
name: "num_replicas_in_sync"
mtype: "<type \'property\'>"
}
member {
name: "steps_per_run"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'tpu_cluster_resolver\', \'steps_per_run\', \'device_assignment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "broadcast"
argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "colocate_vars_with"
argspec: "args=[\'self\', \'colocate_with_variable\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "configure"
argspec: "args=[\'self\', \'session_config\', \'cluster_spec\', \'task_type\', \'task_id\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "experimental_make_numpy_iterator"
argspec: "args=[\'self\', \'numpy_input\', \'batch_size\', \'num_epochs\', \'shuffle\', \'session\'], varargs=None, keywords=None, defaults=[\'1\', \'1024\', \'None\'], "
}
member_method {
name: "experimental_run"
argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "group"
argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "make_dataset_iterator"
argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "make_input_fn_iterator"
argspec: "args=[\'self\', \'input_fn\', \'replication_mode\'], varargs=None, keywords=None, defaults=[\'InputReplicationMode.PER_WORKER\'], "
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "unwrap"
argspec: "args=[\'self\', \'value\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "update_config_proto"
argspec: "args=[\'self\', \'config_proto\'], varargs=None, keywords=None, defaults=None"
}
}

View File

@ -8,4 +8,8 @@ tf_module {
name: "ParameterServerStrategy"
mtype: "<type \'type\'>"
}
member {
name: "TPUStrategy"
mtype: "<type \'type\'>"
}
}

View File

@ -612,6 +612,10 @@ tf_module {
name: "test"
mtype: "<type \'module\'>"
}
member {
name: "tpu"
mtype: "<type \'module\'>"
}
member {
name: "train"
mtype: "<type \'module\'>"

View File

@ -0,0 +1,7 @@
path: "tensorflow.tpu.experimental"
tf_module {
member_method {
name: "initialize_tpu_system"
argspec: "args=[\'cluster_resolver\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
}

View File

@ -0,0 +1,7 @@
path: "tensorflow.tpu"
tf_module {
member {
name: "experimental"
mtype: "<type \'module\'>"
}
}

View File

@ -0,0 +1,70 @@
path: "tensorflow.distribute.experimental.TPUStrategy"
tf_class {
is_instance: "<class \'tensorflow.python.distribute.tpu_strategy.TPUStrategy\'>"
is_instance: "<class \'tensorflow.python.distribute.distribute_lib.DistributionStrategy\'>"
is_instance: "<type \'object\'>"
member {
name: "extended"
mtype: "<type \'property\'>"
}
member {
name: "num_replicas_in_sync"
mtype: "<type \'property\'>"
}
member {
name: "steps_per_run"
mtype: "<type \'property\'>"
}
member_method {
name: "__init__"
argspec: "args=[\'self\', \'tpu_cluster_resolver\', \'steps_per_run\', \'device_assignment\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
}
member_method {
name: "broadcast"
argspec: "args=[\'self\', \'tensor\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "colocate_vars_with"
argspec: "args=[\'self\', \'colocate_with_variable\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "configure"
argspec: "args=[\'self\', \'session_config\', \'cluster_spec\', \'task_type\', \'task_id\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
name: "experimental_make_numpy_iterator"
argspec: "args=[\'self\', \'numpy_input\', \'batch_size\', \'num_epochs\', \'shuffle\', \'session\'], varargs=None, keywords=None, defaults=[\'1\', \'1024\', \'None\'], "
}
member_method {
name: "experimental_run"
argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "group"
argspec: "args=[\'self\', \'value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "make_dataset_iterator"
argspec: "args=[\'self\', \'dataset\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "make_input_fn_iterator"
argspec: "args=[\'self\', \'input_fn\', \'replication_mode\'], varargs=None, keywords=None, defaults=[\'InputReplicationMode.PER_WORKER\'], "
}
member_method {
name: "reduce"
argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "scope"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "unwrap"
argspec: "args=[\'self\', \'value\'], varargs=None, keywords=None, defaults=None"
}
member_method {
name: "update_config_proto"
argspec: "args=[\'self\', \'config_proto\'], varargs=None, keywords=None, defaults=None"
}
}

View File

@ -8,4 +8,8 @@ tf_module {
name: "ParameterServerStrategy"
mtype: "<type \'type\'>"
}
member {
name: "TPUStrategy"
mtype: "<type \'type\'>"
}
}

View File

@ -336,6 +336,10 @@ tf_module {
name: "test"
mtype: "<type \'module\'>"
}
member {
name: "tpu"
mtype: "<type \'module\'>"
}
member {
name: "train"
mtype: "<type \'module\'>"

View File

@ -0,0 +1,7 @@
path: "tensorflow.tpu.experimental"
tf_module {
member_method {
name: "initialize_tpu_system"
argspec: "args=[\'cluster_resolver\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
}

View File

@ -0,0 +1,7 @@
path: "tensorflow.tpu"
tf_module {
member {
name: "experimental"
mtype: "<type \'module\'>"
}
}