Makes it possible to use the same optimizer as BERT for embeddings. Also, see Decoupled Weight Decay Regularization (https://arxiv.org/abs/1711.05101). PiperOrigin-RevId: 293474777 Change-Id: I6ca37d5699ed39e5983f82ce32cde910e0ada164
11 lines
642 B
Plaintext
11 lines
642 B
Plaintext
path: "tensorflow.tpu.experimental.AdagradParameters"
|
|
tf_class {
|
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding.AdagradParameters\'>"
|
|
is_instance: "<class \'tensorflow.python.tpu.tpu_embedding._OptimizationParameters\'>"
|
|
is_instance: "<type \'object\'>"
|
|
member_method {
|
|
name: "__init__"
|
|
argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator\', \'use_gradient_accumulation\', \'clip_weight_min\', \'clip_weight_max\', \'weight_decay_factor\', \'multiply_weight_decay_factor_by_learning_rate\'], varargs=None, keywords=None, defaults=[\'0.1\', \'True\', \'None\', \'None\', \'None\', \'None\'], "
|
|
}
|
|
}
|