From ff705f007e6a7aef3b1c050718153daad541d652 Mon Sep 17 00:00:00 2001
From: Chenkai Kuang <chenkai@google.com>
Date: Mon, 30 Nov 2020 11:51:15 -0800
Subject: [PATCH] Small test fix: avoid creating optimizer objects inside the
 `fn` passed to strategy.run. This ends up creating separate optimizer objects
 for each replica in MirroredStrategy.

PiperOrigin-RevId: 344857919
Change-Id: Iabf8d42143da6683612c8d26f172493c9e24f72c
---
 tensorflow/python/keras/mixed_precision/keras_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/mixed_precision/keras_test.py b/tensorflow/python/keras/mixed_precision/keras_test.py
index 59c6b6dfdda..a8356f31568 100644
--- a/tensorflow/python/keras/mixed_precision/keras_test.py
+++ b/tensorflow/python/keras/mixed_precision/keras_test.py
@@ -253,6 +253,10 @@ class KerasLayerTest(keras_parameterized.TestCase):
     with strategy_fn().scope() as strategy:
       with policy.policy_scope('mixed_float16'):
         layer = mp_test_util.MultiplyLayer(assert_type=dtypes.float16)
+        # Learning rate is small enough that if applied to a float16 variable,
+        # the variable will not change. So this tests the learning rate is not
+        # applied to a float16 value, but instead the float32 variable.
+        opt = gradient_descent.SGD(2**-14)
 
         def run_fn():
           with backprop.GradientTape() as tape:
@@ -261,10 +265,6 @@ class KerasLayerTest(keras_parameterized.TestCase):
             # sum of each of the replica's losses.
             y /= strategy.num_replicas_in_sync
 
-          # Learning rate is small enough that if applied to a float16 variable,
-          # the variable will not change. So this tests the learning rate is not
-          # applied to a float16 value, but instead the float32 variable.
-          opt = gradient_descent.SGD(2**-14)
           grad = tape.gradient(y, layer.v)
           return opt.apply_gradients([(grad, layer.v)])