Add TPUEmbedding mid level API.
PiperOrigin-RevId: 312299886 Change-Id: If8e24b080c0fc6d841c0c681aae71d7537b704f8
This commit is contained in:
parent
6c1f11a557
commit
81065dbaba
|
@ -67,6 +67,7 @@ TENSORFLOW_API_INIT_FILES = [
|
||||||
"summary/experimental/__init__.py",
|
"summary/experimental/__init__.py",
|
||||||
"sysconfig/__init__.py",
|
"sysconfig/__init__.py",
|
||||||
"test/__init__.py",
|
"test/__init__.py",
|
||||||
|
"tpu/experimental/embedding/__init__.py",
|
||||||
"tpu/experimental/__init__.py",
|
"tpu/experimental/__init__.py",
|
||||||
"tpu/__init__.py",
|
"tpu/__init__.py",
|
||||||
"train/__init__.py",
|
"train/__init__.py",
|
||||||
|
|
|
@ -85,6 +85,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
|
||||||
"summary/__init__.py",
|
"summary/__init__.py",
|
||||||
"sysconfig/__init__.py",
|
"sysconfig/__init__.py",
|
||||||
"test/__init__.py",
|
"test/__init__.py",
|
||||||
|
"tpu/experimental/embedding/__init__.py",
|
||||||
"tpu/experimental/__init__.py",
|
"tpu/experimental/__init__.py",
|
||||||
"tpu/__init__.py",
|
"tpu/__init__.py",
|
||||||
"train/__init__.py",
|
"train/__init__.py",
|
||||||
|
|
|
@ -179,6 +179,8 @@ py_library(
|
||||||
":feature_column_v2",
|
":feature_column_v2",
|
||||||
":preempted_hook_py",
|
":preempted_hook_py",
|
||||||
":tpu_embedding",
|
":tpu_embedding",
|
||||||
|
":tpu_embedding_v2",
|
||||||
|
":tpu_embedding_v2_utils",
|
||||||
":tpu_lib",
|
":tpu_lib",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
@ -435,6 +437,45 @@ tf_py_test(
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "tpu_embedding_v2_utils",
|
||||||
|
srcs = ["tpu_embedding_v2_utils.py"],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
visibility = [
|
||||||
|
"//learning/brain/contrib/learn/tpu:__subpackages__",
|
||||||
|
"//quality/deepsearch:__subpackages__",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
"//tensorflow/python:variable_scope",
|
||||||
|
"//tensorflow/python/distribute:device_util",
|
||||||
|
"//tensorflow/python/distribute:sharded_variable",
|
||||||
|
"//tensorflow/python/tpu:tpu_lib",
|
||||||
|
"//tensorflow/python/tpu:tpu_py",
|
||||||
|
"//tensorflow/python/training/saving:saveable_hook",
|
||||||
|
"@six_archive//:six",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
py_library(
|
||||||
|
name = "tpu_embedding_v2",
|
||||||
|
srcs = ["tpu_embedding_v2.py"],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
visibility = [
|
||||||
|
"//learning/brain/contrib/learn/tpu:__subpackages__",
|
||||||
|
"//quality/deepsearch:__subpackages__",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":tpu_embedding_v2_utils",
|
||||||
|
"//tensorflow/python:variable_scope",
|
||||||
|
"//tensorflow/python/distribute:device_util",
|
||||||
|
"//tensorflow/python/distribute:sharded_variable",
|
||||||
|
"//tensorflow/python/tpu:tpu_lib",
|
||||||
|
"//tensorflow/python/tpu:tpu_py",
|
||||||
|
"//tensorflow/python/training/saving:saveable_hook",
|
||||||
|
"@six_archive//:six",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
tf_proto_library(
|
tf_proto_library(
|
||||||
name = "tensor_tracer_proto",
|
name = "tensor_tracer_proto",
|
||||||
srcs = ["tensor_tracer.proto"],
|
srcs = ["tensor_tracer.proto"],
|
||||||
|
|
|
@ -27,5 +27,7 @@ from tensorflow.python.tpu import bfloat16
|
||||||
from tensorflow.python.tpu import feature_column_v2
|
from tensorflow.python.tpu import feature_column_v2
|
||||||
from tensorflow.python.tpu import tpu
|
from tensorflow.python.tpu import tpu
|
||||||
from tensorflow.python.tpu import tpu_embedding
|
from tensorflow.python.tpu import tpu_embedding
|
||||||
|
from tensorflow.python.tpu import tpu_embedding_v2
|
||||||
|
from tensorflow.python.tpu import tpu_embedding_v2_utils
|
||||||
from tensorflow.python.tpu import tpu_optimizer
|
from tensorflow.python.tpu import tpu_optimizer
|
||||||
# pylint: enable=unused-import
|
# pylint: enable=unused-import
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,624 @@
|
||||||
|
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Companion classes for mid level API for TPU Embeddings in TF2."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import abc
|
||||||
|
import functools
|
||||||
|
import math
|
||||||
|
import six
|
||||||
|
|
||||||
|
from tensorflow.core.protobuf.tpu import optimization_parameters_pb2
|
||||||
|
from tensorflow.python.ops import init_ops_v2
|
||||||
|
from tensorflow.python.ops import variables as tf_variables
|
||||||
|
from tensorflow.python.tpu.ops import tpu_ops
|
||||||
|
from tensorflow.python.util.tf_export import tf_export
|
||||||
|
|
||||||
|
|
||||||
|
@six.add_metaclass(abc.ABCMeta)
|
||||||
|
class _Optimizer(object):
|
||||||
|
"""Base class for all optimizers, with common parameters."""
|
||||||
|
|
||||||
|
def __init__(self, learning_rate, use_gradient_accumulation, clip_weight_min,
|
||||||
|
clip_weight_max, weight_decay_factor,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate,
|
||||||
|
slot_variable_creation_fn=None):
|
||||||
|
self.learning_rate = learning_rate
|
||||||
|
self.use_gradient_accumulation = use_gradient_accumulation
|
||||||
|
self.clip_weight_min = clip_weight_min
|
||||||
|
self.clip_weight_max = clip_weight_max
|
||||||
|
self.weight_decay_factor = weight_decay_factor
|
||||||
|
self.multiply_weight_decay_factor_by_learning_rate = (
|
||||||
|
multiply_weight_decay_factor_by_learning_rate)
|
||||||
|
|
||||||
|
if (slot_variable_creation_fn is not None and
|
||||||
|
not callable(slot_variable_creation_fn)):
|
||||||
|
raise ValueError("slot_variable_creation_fn must be either None or a "
|
||||||
|
"callable.")
|
||||||
|
self.slot_variable_creation_fn = slot_variable_creation_fn
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _slot_names(self):
|
||||||
|
"""Returns the name of all the slot variables.
|
||||||
|
|
||||||
|
This does not include the 'parameters' variable and these names must match
|
||||||
|
the names of the slots variables as used in the corresponding
|
||||||
|
`tpu_ops.load_tpu_embedding_*` ops.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _slot_initializers(self):
|
||||||
|
"""Returns initializers for slot variables.
|
||||||
|
|
||||||
|
This returns a parallel list to self._slot_names().
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _set_optimization_parameters(self, parameters):
|
||||||
|
"""Sets the optimizer fields in the OptimizationParameters."""
|
||||||
|
if self.use_gradient_accumulation:
|
||||||
|
parameters.gradient_accumulation_status = (
|
||||||
|
optimization_parameters_pb2.GradientAccumulationStatus.ENABLED)
|
||||||
|
else:
|
||||||
|
parameters.gradient_accumulation_status = (
|
||||||
|
optimization_parameters_pb2.GradientAccumulationStatus.DISABLED)
|
||||||
|
|
||||||
|
if self.clip_weight_min is not None:
|
||||||
|
parameters.clipping_limits.lower.value = self.clip_weight_min
|
||||||
|
|
||||||
|
if self.clip_weight_max is not None:
|
||||||
|
parameters.clipping_limits.upper.value = self.clip_weight_max
|
||||||
|
|
||||||
|
if self.weight_decay_factor:
|
||||||
|
parameters.weight_decay_factor = self.weight_decay_factor
|
||||||
|
if self.multiply_weight_decay_factor_by_learning_rate:
|
||||||
|
parameters.multiply_weight_decay_factor_by_learning_rate = True
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _load(self):
|
||||||
|
"""Returns the load function for the optimizer."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def _retrieve(self):
|
||||||
|
"""Returns the retrieve function for the optimizer."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def _create_slots(self, table):
|
||||||
|
"""Creates slot variables for table.
|
||||||
|
|
||||||
|
Uses shape of table to create parallel slot variables.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: A Variable or equivalent.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict of variables, keyed by self._slot_names().
|
||||||
|
"""
|
||||||
|
if self.slot_variable_creation_fn is not None:
|
||||||
|
return self.slot_variable_creation_fn(table, self._slot_names())
|
||||||
|
else:
|
||||||
|
slots = {}
|
||||||
|
for slot, initializer in zip(self._slot_names(),
|
||||||
|
self._slot_initializers()):
|
||||||
|
slots[slot] = tf_variables.Variable(
|
||||||
|
name=table.name + "/" + slot,
|
||||||
|
initial_value=functools.partial(
|
||||||
|
initializer, shape=table.shape, dtype=table.dtype),
|
||||||
|
trainable=False)
|
||||||
|
return slots
|
||||||
|
|
||||||
|
|
||||||
|
@tf_export("tpu.experimental.embedding.SGD")
|
||||||
|
class SGD(_Optimizer):
|
||||||
|
"""Optimization parameters for stochastic gradient descent for TPU embeddings.
|
||||||
|
|
||||||
|
Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`
|
||||||
|
argument to set the global optimizer and its parameters:
|
||||||
|
|
||||||
|
```
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.SGD(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the
|
||||||
|
optimizer parameter to set a table specific optimizer. This will override the
|
||||||
|
optimizer and parameters for global embedding optimizer defined above:
|
||||||
|
|
||||||
|
```
|
||||||
|
table_one = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...,
|
||||||
|
optimizer=tf.tpu.experimental.embedding.SGD(0.2))
|
||||||
|
table_two = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
|
||||||
|
feature_config = (
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_one),
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_two))
|
||||||
|
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
feature_config=feature_config,
|
||||||
|
batch_size=...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.SGD(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above example, the first feature will be looked up in a table that has
|
||||||
|
a learning rate of 0.2 while the second feature will be looked up in a table
|
||||||
|
that has a learning rate of 0.1.
|
||||||
|
|
||||||
|
See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a
|
||||||
|
complete description of these parameters and their impacts on the optimizer
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
learning_rate=0.01,
|
||||||
|
clip_weight_min=None,
|
||||||
|
clip_weight_max=None,
|
||||||
|
weight_decay_factor=None,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate=None):
|
||||||
|
"""Optimization parameters for stochastic gradient descent.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
learning_rate: The learning rate. It should be a floating point value or a
|
||||||
|
callable taking no arguments for a dynamic learning rate.
|
||||||
|
clip_weight_min: the minimum value to clip by; None means -infinity.
|
||||||
|
clip_weight_max: the maximum value to clip by; None means +infinity.
|
||||||
|
weight_decay_factor: amount of weight decay to apply; None means that the
|
||||||
|
weights are not decayed. Weights are decayed by multiplying the weight
|
||||||
|
by this factor each step.
|
||||||
|
multiply_weight_decay_factor_by_learning_rate: if true,
|
||||||
|
`weight_decay_factor` is multiplied by the current learning rate.
|
||||||
|
"""
|
||||||
|
super(SGD, self).__init__(
|
||||||
|
learning_rate, False, clip_weight_min, clip_weight_max,
|
||||||
|
weight_decay_factor, multiply_weight_decay_factor_by_learning_rate)
|
||||||
|
|
||||||
|
def _slot_names(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _slot_initializers(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _set_optimization_parameters(self, parameters):
|
||||||
|
super(SGD, self)._set_optimization_parameters(parameters)
|
||||||
|
parameters.stochastic_gradient_descent.SetInParent()
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
return tpu_ops.load_tpu_embedding_stochastic_gradient_descent_parameters
|
||||||
|
|
||||||
|
def _retrieve(self):
|
||||||
|
return tpu_ops.retrieve_tpu_embedding_stochastic_gradient_descent_parameters
|
||||||
|
|
||||||
|
|
||||||
|
@tf_export("tpu.experimental.embedding.Adagrad")
|
||||||
|
class Adagrad(_Optimizer):
|
||||||
|
"""Optimization parameters for Adagrad with TPU embeddings.
|
||||||
|
|
||||||
|
Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`
|
||||||
|
argument to set the global optimizer and its parameters:
|
||||||
|
|
||||||
|
```python
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adagrad(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the
|
||||||
|
optimizer parameter to set a table specific optimizer. This will override the
|
||||||
|
optimizer and parameters for global embedding optimizer defined above:
|
||||||
|
|
||||||
|
```python
|
||||||
|
table_one = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...,
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adagrad(0.2))
|
||||||
|
table_two = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
|
||||||
|
feature_config = (
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_one),
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_two))
|
||||||
|
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
feature_config=feature_config,
|
||||||
|
batch_size=...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adagrad(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above example, the first feature will be looked up in a table that has
|
||||||
|
a learning rate of 0.2 while the second feature will be looked up in a table
|
||||||
|
that has a learning rate of 0.1.
|
||||||
|
|
||||||
|
See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a
|
||||||
|
complete description of these parameters and their impacts on the optimizer
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
learning_rate=0.001,
|
||||||
|
initial_accumulator_value=0.1,
|
||||||
|
use_gradient_accumulation=True,
|
||||||
|
clip_weight_min=None,
|
||||||
|
clip_weight_max=None,
|
||||||
|
weight_decay_factor=None,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate=None,
|
||||||
|
slot_variable_creation_fn=None):
|
||||||
|
"""Optimization parameters for Adagrad.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
learning_rate: The learning rate. It should be a floating point value or a
|
||||||
|
callable taking no arguments for a dynamic learning rate.
|
||||||
|
initial_accumulator_value: initial accumulator for Adagrad.
|
||||||
|
use_gradient_accumulation: setting this to `False` makes embedding
|
||||||
|
gradients calculation less accurate but faster.
|
||||||
|
clip_weight_min: the minimum value to clip by; None means -infinity.
|
||||||
|
clip_weight_max: the maximum value to clip by; None means +infinity.
|
||||||
|
weight_decay_factor: amount of weight decay to apply; None means that the
|
||||||
|
weights are not decayed.
|
||||||
|
multiply_weight_decay_factor_by_learning_rate: if true,
|
||||||
|
`weight_decay_factor` is multiplied by the current learning rate.
|
||||||
|
slot_variable_creation_fn: Defaults to `None`. If you wish do directly
|
||||||
|
control the creation of the slot variables, set this to a callable
|
||||||
|
taking two parameters, a variable and a list of slot names to create for
|
||||||
|
it. This function should return a dict with the slot names as keys and
|
||||||
|
the created variables as values. When set to None (the default), uses
|
||||||
|
the built-in variable creation.
|
||||||
|
"""
|
||||||
|
super(Adagrad, self).__init__(
|
||||||
|
learning_rate, use_gradient_accumulation, clip_weight_min,
|
||||||
|
clip_weight_max, weight_decay_factor,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate,
|
||||||
|
slot_variable_creation_fn)
|
||||||
|
if initial_accumulator_value <= 0:
|
||||||
|
raise ValueError("Adagrad initial_accumulator_value must be positive")
|
||||||
|
self.initial_accumulator_value = initial_accumulator_value
|
||||||
|
|
||||||
|
def _slot_names(self):
|
||||||
|
return ["accumulators"]
|
||||||
|
|
||||||
|
def _slot_initializers(self):
|
||||||
|
return [init_ops_v2.Constant(self.initial_accumulator_value)]
|
||||||
|
|
||||||
|
def _set_optimization_parameters(self, parameters):
|
||||||
|
super(Adagrad, self)._set_optimization_parameters(parameters)
|
||||||
|
parameters.adagrad.SetInParent()
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
return tpu_ops.load_tpu_embedding_adagrad_parameters
|
||||||
|
|
||||||
|
def _retrieve(self):
|
||||||
|
return tpu_ops.retrieve_tpu_embedding_adagrad_parameters
|
||||||
|
|
||||||
|
|
||||||
|
@tf_export("tpu.experimental.embedding.Adam")
|
||||||
|
class Adam(_Optimizer):
|
||||||
|
"""Optimization parameters for Adam with TPU embeddings.
|
||||||
|
|
||||||
|
Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`
|
||||||
|
argument to set the global optimizer and its parameters:
|
||||||
|
|
||||||
|
NOTE: By default this optimizer is lazy, i.e. it will not apply the gradient
|
||||||
|
update of zero to rows that were not looked up. You can change this behavior
|
||||||
|
by setting `lazy_adam` to `False`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adam(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the
|
||||||
|
optimizer parameter to set a table specific optimizer. This will override the
|
||||||
|
optimizer and parameters for global embedding optimizer defined above:
|
||||||
|
|
||||||
|
```python
|
||||||
|
table_one = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...,
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adam(0.2))
|
||||||
|
table_two = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
|
||||||
|
feature_config = (
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_one),
|
||||||
|
tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_two))
|
||||||
|
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
feature_config=feature_config,
|
||||||
|
batch_size=...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adam(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
In the above example, the first feature will be looked up in a table that has
|
||||||
|
a learning rate of 0.2 while the second feature will be looked up in a table
|
||||||
|
that has a learning rate of 0.1.
|
||||||
|
|
||||||
|
See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a
|
||||||
|
complete description of these parameters and their impacts on the optimizer
|
||||||
|
algorithm.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
learning_rate=0.001,
|
||||||
|
beta_1=0.9,
|
||||||
|
beta_2=0.999,
|
||||||
|
epsilon=1e-07,
|
||||||
|
lazy_adam=True,
|
||||||
|
sum_inside_sqrt=True,
|
||||||
|
use_gradient_accumulation=True,
|
||||||
|
clip_weight_min=None,
|
||||||
|
clip_weight_max=None,
|
||||||
|
weight_decay_factor=None,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate=None,
|
||||||
|
slot_variable_creation_fn=None):
|
||||||
|
"""Optimization parameters for Adam.
|
||||||
|
|
||||||
|
See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a
|
||||||
|
complete description of these parameters and their impacts on the optimizer
|
||||||
|
algorithm.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
learning_rate: The learning rate. It should be a floating point value or a
|
||||||
|
callable taking no arguments for a dynamic learning rate.
|
||||||
|
beta_1: A float value.
|
||||||
|
The exponential decay rate for the 1st moment estimates.
|
||||||
|
beta_2: A float value.
|
||||||
|
The exponential decay rate for the 2nd moment estimates.
|
||||||
|
epsilon: A small constant for numerical stability.
|
||||||
|
lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster.
|
||||||
|
sum_inside_sqrt: When this is true, the Adam update formula is changed
|
||||||
|
from `m / (sqrt(v) + epsilon)` to `m / sqrt(v + epsilon**2)`. This
|
||||||
|
option improves the performance of TPU training and is not expected to
|
||||||
|
harm model quality.
|
||||||
|
use_gradient_accumulation: Setting this to `False` makes embedding
|
||||||
|
gradients calculation less accurate but faster.
|
||||||
|
clip_weight_min: the minimum value to clip by; None means -infinity.
|
||||||
|
clip_weight_max: the maximum value to clip by; None means +infinity.
|
||||||
|
weight_decay_factor: amount of weight decay to apply; None means that the
|
||||||
|
weights are not decayed.
|
||||||
|
multiply_weight_decay_factor_by_learning_rate: if true,
|
||||||
|
`weight_decay_factor` is multiplied by the current learning rate.
|
||||||
|
slot_variable_creation_fn: a callable taking two parameters, a variable
|
||||||
|
and a list of slot names to create for it. This function should return
|
||||||
|
a dict with the slot names as keys and the created variables as values.
|
||||||
|
When set to None (the default), uses the built-in variable creation.
|
||||||
|
"""
|
||||||
|
super(Adam, self).__init__(
|
||||||
|
learning_rate, use_gradient_accumulation, clip_weight_min,
|
||||||
|
clip_weight_max, weight_decay_factor,
|
||||||
|
multiply_weight_decay_factor_by_learning_rate,
|
||||||
|
slot_variable_creation_fn)
|
||||||
|
if beta_1 < 0. or beta_1 >= 1.:
|
||||||
|
raise ValueError("beta1 must be in the range [0, 1), but received {}."
|
||||||
|
.format(beta_1))
|
||||||
|
if beta_2 < 0. or beta_2 >= 1.:
|
||||||
|
raise ValueError("beta2 must be in the range [0, 1), but received {}."
|
||||||
|
.format(beta_2))
|
||||||
|
if epsilon <= 0.:
|
||||||
|
raise ValueError("epsilon must be positive; got {}.".format(epsilon))
|
||||||
|
if not use_gradient_accumulation and not lazy_adam:
|
||||||
|
raise ValueError(
|
||||||
|
"When disabling Lazy Adam, gradient accumulation must be used.")
|
||||||
|
|
||||||
|
self.beta_1 = beta_1
|
||||||
|
self.beta_2 = beta_2
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.lazy_adam = lazy_adam
|
||||||
|
self.sum_inside_sqrt = sum_inside_sqrt
|
||||||
|
|
||||||
|
def _slot_names(self):
|
||||||
|
return ["momenta", "velocities"]
|
||||||
|
|
||||||
|
def _slot_initializers(self):
|
||||||
|
return [init_ops_v2.Constant(), init_ops_v2.Constant()]
|
||||||
|
|
||||||
|
def _set_optimization_parameters(self, parameters):
|
||||||
|
super(Adam, self)._set_optimization_parameters(parameters)
|
||||||
|
parameters.adam.beta1 = self.beta_1
|
||||||
|
parameters.adam.beta2 = self.beta_2
|
||||||
|
parameters.adam.epsilon = self.epsilon
|
||||||
|
parameters.adam.use_non_lazy_adam = not self.lazy_adam
|
||||||
|
parameters.adam.use_sum_inside_sqrt = self.sum_inside_sqrt
|
||||||
|
|
||||||
|
def _load(self):
|
||||||
|
return tpu_ops.load_tpu_embedding_adam_parameters
|
||||||
|
|
||||||
|
def _retrieve(self):
|
||||||
|
return tpu_ops.retrieve_tpu_embedding_adam_parameters
|
||||||
|
|
||||||
|
|
||||||
|
@tf_export("tpu.experimental.embedding.TableConfig")
|
||||||
|
class TableConfig(object):
|
||||||
|
"""Configuration data for one embedding table.
|
||||||
|
|
||||||
|
This class holds the configuration data for a single embedding table. It is
|
||||||
|
used as the `table` parameter of a
|
||||||
|
`tf.tpu.experimental.embedding.FeatureConfig`. Multiple
|
||||||
|
`tf.tpu.experimental.embedding.FeatureConfig` objects can use the same
|
||||||
|
`tf.tpu.experimental.embedding.TableConfig` object. In this case a shared
|
||||||
|
table will be created for those feature lookups.
|
||||||
|
|
||||||
|
```python
|
||||||
|
table_config_one = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
table_config_two = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
feature_config = {
|
||||||
|
'feature_one': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_one),
|
||||||
|
'feature_two': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_one),
|
||||||
|
'feature_three': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_two)}
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
feature_config=feature_config,
|
||||||
|
batch_size=...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adam(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
The above configuration has 2 tables, and three features. The first two
|
||||||
|
features will be looked up in the first table and the third feature will be
|
||||||
|
looked up in the second table.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, vocabulary_size, dim, initializer, optimizer=None,
|
||||||
|
combiner="mean", name=None):
|
||||||
|
"""Embedding table configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vocabulary_size: Size of the table's vocabulary (number of rows).
|
||||||
|
dim: The embedding dimension (width) of the table.
|
||||||
|
initializer: A callable initializer taking one parameter, the shape of the
|
||||||
|
variable that will be initialized. Will be called once per task, to
|
||||||
|
initialize that task's shard of the embedding table. If not specified,
|
||||||
|
defaults to `truncated_normal_initializer` with mean `0.0` and standard
|
||||||
|
deviation `1/sqrt(dim)`.
|
||||||
|
optimizer: An optional instance of an optimizer parameters class, instance
|
||||||
|
of one of `tf.tpu.experimental.embedding.SGD`,
|
||||||
|
`tf.tpu.experimental.embedding.Adagrad` or
|
||||||
|
`tf.tpu.experimental.embedding.Adam`. It set will override the global
|
||||||
|
optimizer passed to `tf.tpu.experimental.embedding.TPUEmbedding`.
|
||||||
|
combiner: A string specifying how to reduce if there are multiple entries
|
||||||
|
in a single row. Currently 'mean', 'sqrtn', 'sum' are
|
||||||
|
supported, with 'mean' the default. 'sqrtn' often achieves good
|
||||||
|
accuracy, in particular with bag-of-words columns. For more information,
|
||||||
|
see `tf.nn.embedding_lookup_sparse`.
|
||||||
|
name: An optional string used to name the table. Useful for debugging.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`TableConfig`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if `vocabulary_size` is not a positive integer.
|
||||||
|
ValueError: if `dim` is not a positive integer.
|
||||||
|
ValueError: if `initializer` is specified and is not callable.
|
||||||
|
ValueError: if `combiner` is not supported.
|
||||||
|
"""
|
||||||
|
if not isinstance(vocabulary_size, int) or vocabulary_size < 1:
|
||||||
|
raise ValueError("Invalid vocabulary_size {}.".format(vocabulary_size))
|
||||||
|
|
||||||
|
if not isinstance(dim, int) or dim < 1:
|
||||||
|
raise ValueError("Invalid dim {}.".format(dim))
|
||||||
|
|
||||||
|
if (initializer is not None) and (not callable(initializer)):
|
||||||
|
raise ValueError("initializer must be callable if specified.")
|
||||||
|
if initializer is None:
|
||||||
|
initializer = init_ops_v2.TruncatedNormal(mean=0.0,
|
||||||
|
stddev=1/math.sqrt(dim))
|
||||||
|
|
||||||
|
if combiner not in ("mean", "sum", "sqrtn"):
|
||||||
|
raise ValueError("Invalid combiner {}".format(combiner))
|
||||||
|
|
||||||
|
self.vocabulary_size = vocabulary_size
|
||||||
|
self.dim = dim
|
||||||
|
self.initializer = initializer
|
||||||
|
self.optimizer = optimizer
|
||||||
|
self.combiner = combiner
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
|
||||||
|
@tf_export("tpu.experimental.embedding.FeatureConfig")
|
||||||
|
class FeatureConfig(object):
|
||||||
|
"""Configuration data for one embedding feature.
|
||||||
|
|
||||||
|
This class holds the configuration data for a single embedding feature. The
|
||||||
|
main use is to assign features to `tf.tpu.experimental.embedding.TableConfig`s
|
||||||
|
via the table parameter:
|
||||||
|
|
||||||
|
```python
|
||||||
|
table_config_one = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
table_config_two = tf.tpu.experimental.embedding.TableConfig(
|
||||||
|
vocabulary_size=...,
|
||||||
|
dim=...)
|
||||||
|
feature_config = {
|
||||||
|
'feature_one': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_one),
|
||||||
|
'feature_two': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_one),
|
||||||
|
'feature_three': tf.tpu.experimental.embedding.FeatureConfig(
|
||||||
|
table=table_config_two)}
|
||||||
|
embedding = tf.tpu.experimental.embedding.TPUEmbedding(
|
||||||
|
feature_config=feature_config,
|
||||||
|
batch_size=...
|
||||||
|
optimizer=tf.tpu.experimental.embedding.Adam(0.1))
|
||||||
|
```
|
||||||
|
|
||||||
|
The above configuration has 2 tables, and three features. The first two
|
||||||
|
features will be looked up in the first table and the third feature will be
|
||||||
|
looked up in the second table.
|
||||||
|
|
||||||
|
When feeding features into `embedding.enqueue` they can be `tf.Tensor`s,
|
||||||
|
`tf.SparseTensor`s or `tf.RaggedTensor`s. When the argument
|
||||||
|
`max_sequence_length` is 0, the default, you should expect a output of
|
||||||
|
`embedding.dequeue` for this feature of shape `(batch_size, dim)`. If
|
||||||
|
`max_sequence_length` is greater than 0, the feature is embedded as a sequence
|
||||||
|
and padded up to the given length. The shape of the output for this feature
|
||||||
|
will be `(batch_size, max_sequence_length, dim)`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, table, max_sequence_length=0, name=None):
|
||||||
|
"""Feature configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
table: An instance of `tf.tpu.experimental.embedding.TableConfig`,
|
||||||
|
describing the table in which this feature should be looked up.
|
||||||
|
max_sequence_length: If positive, the feature is a sequence feature with
|
||||||
|
the corresponding maximum sequence length. If the sequence is longer
|
||||||
|
than this, it will be truncated. If 0, the feature is not a sequence
|
||||||
|
feature.
|
||||||
|
name: An optional name for the feature, useful for debugging.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
`FeatureConfig`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if `table` is not an instance of
|
||||||
|
`tf.tpu.experimental.embedding.TableConfig`.
|
||||||
|
ValueError: if `max_sequence_length` not an integer or is negative.
|
||||||
|
"""
|
||||||
|
if not isinstance(table, TableConfig):
|
||||||
|
raise ValueError("table is type {}, expected "
|
||||||
|
"`tf.tpu.experimental.embedding.TableConfig`".format(
|
||||||
|
type(table)))
|
||||||
|
|
||||||
|
if not isinstance(max_sequence_length, int) or max_sequence_length < 0:
|
||||||
|
raise ValueError("Invalid max_sequence_length {}.".format(
|
||||||
|
max_sequence_length))
|
||||||
|
|
||||||
|
self.table = table
|
||||||
|
self.max_sequence_length = max_sequence_length
|
||||||
|
self.name = name
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.Adagrad"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.Adagrad\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\', \'slot_variable_creation_fn\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.1\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.Adam"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.Adam\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'lazy_adam\', \'sum_inside_sqrt\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\', \'slot_variable_creation_fn\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'True\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.FeatureConfig"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.FeatureConfig\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'table\', \'max_sequence_length\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.SGD"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.SGD\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\'], varargs=None, keywords=None, defaults=[\'0.01\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.TPUEmbedding"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2.TPUEmbedding\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member {
|
||||||
|
name: "embedding_tables"
|
||||||
|
mtype: "<type \'property\'>"
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'feature_config\', \'batch_size\', \'optimizer\', \'pipeline_execution_with_tensor_core\', \'initialize_tpu_embedding\'], varargs=None, keywords=None, defaults=[\'False\', \'True\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "apply_gradients"
|
||||||
|
argspec: "args=[\'self\', \'gradients\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "dequeue"
|
||||||
|
argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "enqueue"
|
||||||
|
argspec: "args=[\'self\', \'features\', \'weights\', \'training\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.TableConfig"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.TableConfig\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'vocabulary_size\', \'dim\', \'initializer\', \'optimizer\', \'combiner\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'mean\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding"
|
||||||
|
tf_module {
|
||||||
|
member {
|
||||||
|
name: "Adagrad"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "Adam"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "FeatureConfig"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "SGD"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "TPUEmbedding"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "TableConfig"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,6 +28,10 @@ tf_module {
|
||||||
name: "Topology"
|
name: "Topology"
|
||||||
mtype: "<type \'type\'>"
|
mtype: "<type \'type\'>"
|
||||||
}
|
}
|
||||||
|
member {
|
||||||
|
name: "embedding"
|
||||||
|
mtype: "<type \'module\'>"
|
||||||
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "embedding_column"
|
name: "embedding_column"
|
||||||
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\', \'True\'], "
|
argspec: "args=[\'categorical_column\', \'dimension\', \'combiner\', \'initializer\', \'max_sequence_length\', \'learning_rate_fn\', \'embedding_lookup_device\', \'tensor_core_shape\', \'use_safe_embedding_lookup\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\', \'0\', \'None\', \'None\', \'None\', \'True\'], "
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.Adagrad"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.Adagrad\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\', \'slot_variable_creation_fn\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.1\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.Adam"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.Adam\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'lazy_adam\', \'sum_inside_sqrt\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\', \'slot_variable_creation_fn\'], varargs=None, keywords=None, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'True\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.FeatureConfig"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.FeatureConfig\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'table\', \'max_sequence_length\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.SGD"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.SGD\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils._Optimizer\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'learning_rate\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\'], varargs=None, keywords=None, defaults=[\'0.01\', \'None\', \'None\', \'None\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.TPUEmbedding"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2.TPUEmbedding\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.training.tracking.tracking.AutoTrackable\'>"
|
||||||
|
is_instance: "<class \'tensorflow.python.training.tracking.base.Trackable\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member {
|
||||||
|
name: "embedding_tables"
|
||||||
|
mtype: "<type \'property\'>"
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'feature_config\', \'batch_size\', \'optimizer\', \'pipeline_execution_with_tensor_core\', \'initialize_tpu_embedding\'], varargs=None, keywords=None, defaults=[\'False\', \'True\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "apply_gradients"
|
||||||
|
argspec: "args=[\'self\', \'gradients\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "dequeue"
|
||||||
|
argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||||
|
}
|
||||||
|
member_method {
|
||||||
|
name: "enqueue"
|
||||||
|
argspec: "args=[\'self\', \'features\', \'weights\', \'training\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,9 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding.TableConfig"
|
||||||
|
tf_class {
|
||||||
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding_v2_utils.TableConfig\'>"
|
||||||
|
is_instance: "<type \'object\'>"
|
||||||
|
member_method {
|
||||||
|
name: "__init__"
|
||||||
|
argspec: "args=[\'self\', \'vocabulary_size\', \'dim\', \'initializer\', \'optimizer\', \'combiner\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'mean\', \'None\'], "
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
path: "tensorflow.tpu.experimental.embedding"
|
||||||
|
tf_module {
|
||||||
|
member {
|
||||||
|
name: "Adagrad"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "Adam"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "FeatureConfig"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "SGD"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "TPUEmbedding"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
member {
|
||||||
|
name: "TableConfig"
|
||||||
|
mtype: "<type \'type\'>"
|
||||||
|
}
|
||||||
|
}
|
|
@ -12,6 +12,10 @@ tf_module {
|
||||||
name: "Topology"
|
name: "Topology"
|
||||||
mtype: "<type \'type\'>"
|
mtype: "<type \'type\'>"
|
||||||
}
|
}
|
||||||
|
member {
|
||||||
|
name: "embedding"
|
||||||
|
mtype: "<type \'module\'>"
|
||||||
|
}
|
||||||
member_method {
|
member_method {
|
||||||
name: "initialize_tpu_system"
|
name: "initialize_tpu_system"
|
||||||
argspec: "args=[\'cluster_resolver\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
argspec: "args=[\'cluster_resolver\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||||
|
|
Loading…
Reference in New Issue