Makes it possible to use the same optimizer as BERT for embeddings. Also, see Decoupled Weight Decay Regularization (https://arxiv.org/abs/1711.05101). PiperOrigin-RevId: 293474777 Change-Id: I6ca37d5699ed39e5983f82ce32cde910e0ada164
11 lines
724 B
Plaintext
11 lines
724 B
Plaintext
path: "tensorflow.tpu.experimental.AdamParameters"
|
|
tf_class {
|
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding.AdamParameters\'>"
|
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding._OptimizationParameters\'>"
|
|
is_instance: "<type \'object\'>"
|
|
member_method {
|
|
name: "__init__"
|
|
argspec: "args=[\'self\', \'learning_rate\', \'beta1\', \'beta2\', \'epsilon\', \'lazy_adam\', \'sum_inside_sqrt\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\'], varargs=None, keywords=None, defaults=[\'0.9\', \'0.999\', \'1e-08\', \'True\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\'], "
|
|
}
|
|
}
|