diff --git a/tensorflow/compiler/tests/adagrad_da_test.py b/tensorflow/compiler/tests/adagrad_da_test.py
index 369d0097a0f..e08435b5713 100644
--- a/tensorflow/compiler/tests/adagrad_da_test.py
+++ b/tensorflow/compiler/tests/adagrad_da_test.py
@@ -56,9 +56,9 @@ class AdagradDAOptimizerTest(xla_test.XLATestCase):
         # Run a step of AdagradDA
         update.run()
 
-        # Let g to be gradient accumulator, gg to be gradient squared
-        # accumulator, T be the global step, lr is the learning rate, and k the
-        # initial gradient squared accumulator value.
+        # Let g be the gradient accumulator, gg be the gradient squared
+        # accumulator, T be the global step, lr be the learning rate,
+        # and k the initial gradient squared accumulator value.
         # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})}
         # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534
         # similarly for others.
diff --git a/tensorflow/python/training/adagrad_da_test.py b/tensorflow/python/training/adagrad_da_test.py
index aacfe6faf4e..0730618e31f 100644
--- a/tensorflow/python/training/adagrad_da_test.py
+++ b/tensorflow/python/training/adagrad_da_test.py
@@ -63,9 +63,9 @@ class AdagradDAOptimizerTest(test.TestCase):
         update.run()
 
         v0_val, v1_val = self.evaluate([var0, var1])
-        # Let g to be gradient accumulator, gg to be gradient squared
-        # accumulator, T be the global step, lr is the learning rate, and k the
-        # initial gradient squared accumulator value.
+        # Let g be the gradient accumulator, gg be the gradient squared
+        # accumulator, T be the global step, lr be the learning rate,
+        # and k the initial gradient squared accumulator value.
         # w = \dfrac{sign(-g)*lr*|g - l1*T|_{+}}{l2*T*lr + \sqrt{k+gg})}
         # For -0.1*3.0*(0.1 - 0)/(0 + sqrt(0.1 + 0.1*0.1)) = -0.904534
         # similarly for others.