diff --git a/tensorflow/lite/micro/kernels/div.cc b/tensorflow/lite/micro/kernels/div.cc
new file mode 100644
index 00000000000..f87f87da45c
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/div.cc
@@ -0,0 +1,266 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/compatibility.h"
+#include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
+#include "tensorflow/lite/kernels/internal/optimized/neon_check.h"
+#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/process_broadcast_shapes.h"
+#include "tensorflow/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/lite/kernels/internal/tensor.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+namespace div {
+
+// This file has three implementation of Div.
+enum KernelType {
+  kReference,
+  kGenericOptimized,  // Neon-free
+  kNeonOptimized,
+};
+
+constexpr int kInputTensor1 = 0;
+constexpr int kInputTensor2 = 1;
+constexpr int kOutputTensor = 0;
+
+struct OpData {
+  bool requires_broadcast;
+
+  // Parameters used in the quantized paths where the output is 8bit
+  int32 output_activation_min;
+  int32 output_activation_max;
+
+  // Parameters used in all quantized paths
+  int32_t output_multiplier;
+  int output_shift;
+};
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  auto* data = new OpData;
+  data->requires_broadcast = false;
+  return data;
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<OpData*>(buffer);
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
+  OpData* data = reinterpret_cast<OpData*>(node->user_data);
+
+  TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
+  TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
+
+  const TfLiteTensor* input1;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor1, &input1));
+  const TfLiteTensor* input2;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor2, &input2));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  TF_LITE_ENSURE_TYPES_EQ(context, input1->type, input2->type);
+  output->type = input2->type;
+
+  data->requires_broadcast = !HaveSameShapes(input1, input2);
+
+  TfLiteIntArray* output_size = nullptr;
+  if (data->requires_broadcast) {
+    TF_LITE_ENSURE_OK(context, CalculateShapeForBroadcast(
+                                   context, input1, input2, &output_size));
+  } else {
+    output_size = TfLiteIntArrayCopy(input1->dims);
+  }
+
+  if (output->type == kTfLiteUInt8) {
+    TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
+        context, params->activation, output, &data->output_activation_min,
+        &data->output_activation_max));
+    const double real_multiplier =
+        input1->params.scale / (input2->params.scale * output->params.scale);
+    QuantizeMultiplier(real_multiplier, &data->output_multiplier,
+                       &data->output_shift);
+  }
+
+  return context->ResizeTensor(context, output, output_size);
+}
+
+template <KernelType kernel_type>
+void EvalDiv(TfLiteContext* context, TfLiteNode* node, TfLiteDivParams* params,
+             const OpData* data, const TfLiteTensor* input1,
+             const TfLiteTensor* input2, TfLiteTensor* output) {
+#define TF_LITE_DIV(type, opname, data_type)                             \
+  tflite::ArithmeticParams op_params;                                    \
+  data_type output_activation_min, output_activation_max;                \
+  CalculateActivationRange(params->activation, &output_activation_min,   \
+                           &output_activation_max);                      \
+  SetActivationParams(output_activation_min, output_activation_max,      \
+                      &op_params);                                       \
+  type::opname(op_params, GetTensorShape(input1),                        \
+               GetTensorData<data_type>(input1), GetTensorShape(input2), \
+               GetTensorData<data_type>(input2), GetTensorShape(output), \
+               GetTensorData<data_type>(output))
+  if (output->type == kTfLiteInt32) {
+    if (kernel_type == kReference) {
+      if (data->requires_broadcast) {
+        TF_LITE_DIV(reference_ops, BroadcastDivSlow, int32_t);
+      } else {
+        TF_LITE_DIV(reference_ops, Div, int32_t);
+      }
+    } else {
+      if (data->requires_broadcast) {
+        TF_LITE_DIV(optimized_ops, BroadcastDivSlow, int32_t);
+      } else {
+        TF_LITE_DIV(optimized_ops, Div, int32_t);
+      }
+    }
+  } else if (output->type == kTfLiteFloat32) {
+    if (kernel_type == kReference) {
+      if (data->requires_broadcast) {
+        TF_LITE_DIV(reference_ops, BroadcastDivSlow, float);
+      } else {
+        TF_LITE_DIV(reference_ops, Div, float);
+      }
+    } else {
+      if (data->requires_broadcast) {
+        TF_LITE_DIV(optimized_ops, BroadcastDivSlow, float);
+      } else {
+        TF_LITE_DIV(optimized_ops, Div, float);
+      }
+    }
+  }
+#undef TF_LITE_DIV
+}
+
+template <KernelType kernel_type>
+TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
+                           TfLiteDivParams* params, const OpData* data,
+                           const TfLiteTensor* input1,
+                           const TfLiteTensor* input2, TfLiteTensor* output) {
+  if (input1->type == kTfLiteUInt8 && input2->type == kTfLiteUInt8 &&
+      output->type == kTfLiteUInt8) {
+    tflite::ArithmeticParams op_params;
+    SetActivationParams(data->output_activation_min,
+                        data->output_activation_max, &op_params);
+    op_params.input1_offset = -input1->params.zero_point;
+    op_params.input2_offset = -input2->params.zero_point;
+    op_params.output_offset = output->params.zero_point;
+    op_params.output_multiplier = data->output_multiplier;
+    op_params.output_shift = data->output_shift;
+    bool need_broadcast = optimized_ops::ProcessBroadcastShapes(
+        GetTensorShape(input1), GetTensorShape(input2), &op_params);
+#define TF_LITE_DIV(type, opname, dtype)                             \
+  type::opname(op_params, GetTensorShape(input1),                    \
+               GetTensorData<dtype>(input1), GetTensorShape(input2), \
+               GetTensorData<dtype>(input2), GetTensorShape(output), \
+               GetTensorData<dtype>(output))
+    if (kernel_type == kReference) {
+      if (need_broadcast) {
+        TF_LITE_DIV(reference_ops, BroadcastDivSlow, uint8_t);
+      } else {
+        TF_LITE_DIV(reference_ops, Div, uint8_t);
+      }
+    } else {
+      if (need_broadcast) {
+        TF_LITE_DIV(optimized_ops, BroadcastDivSlow, uint8_t);
+      } else {
+        TF_LITE_DIV(optimized_ops, Div, uint8_t);
+      }
+    }
+#undef TF_LITE_DIV
+  } else {
+    TF_LITE_KERNEL_LOG(
+        context, "Unsupported combination of input and output types in Div.");
+    return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+template <KernelType kernel_type>
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteDivParams*>(node->builtin_data);
+  OpData* data = reinterpret_cast<OpData*>(node->user_data);
+
+  const TfLiteTensor* input1;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor1, &input1));
+  const TfLiteTensor* input2;
+  TF_LITE_ENSURE_OK(context,
+                    GetInputSafe(context, node, kInputTensor2, &input2));
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
+
+  if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
+    EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteUInt8) {
+    TF_LITE_ENSURE_OK(
+        context, EvalQuantized<kernel_type>(context, node, params, data, input1,
+                                            input2, output));
+  } else {
+    TF_LITE_KERNEL_LOG(
+        context,
+        "Div only supports FLOAT32, INT32 and quantized UINT8 now, got %d.",
+        output->type);
+    return kTfLiteError;
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace div
+
+TfLiteRegistration* Register_DIV_REF() {
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
+                                 div::Eval<div::kReference>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV_GENERIC_OPT() {
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
+                                 div::Eval<div::kGenericOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV_NEON_OPT() {
+  static TfLiteRegistration r = {div::Init, div::Free, div::Prepare,
+                                 div::Eval<div::kNeonOptimized>};
+  return &r;
+}
+
+TfLiteRegistration* Register_DIV() {
+#ifdef USE_NEON
+  return Register_DIV_NEON_OPT();
+#else
+  return Register_DIV_GENERIC_OPT();
+#endif
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/div_test.cc b/tensorflow/lite/micro/kernels/div_test.cc
new file mode 100644
index 00000000000..57836f9b180
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/div_test.cc
@@ -0,0 +1,310 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/kernels/test_util.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace {
+
+using ::testing::ElementsAreArray;
+
+class BaseDivOpModel : public SingleOpModel {
+ public:
+  BaseDivOpModel(const TensorData& input1, const TensorData& input2,
+                 const TensorData& output,
+                 ActivationFunctionType activation_type) {
+    input1_ = AddInput(input1);
+    input2_ = AddInput(input2);
+    output_ = AddOutput(output);
+    SetBuiltinOp(BuiltinOperator_DIV, BuiltinOptions_DivOptions,
+                 CreateDivOptions(builder_, activation_type).Union());
+    BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+  }
+
+  int input1() { return input1_; }
+  int input2() { return input2_; }
+
+ protected:
+  int input1_;
+  int input2_;
+  int output_;
+};
+
+class FloatDivOpModel : public BaseDivOpModel {
+ public:
+  using BaseDivOpModel::BaseDivOpModel;
+
+  std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+};
+
+class IntegerDivOpModel : public BaseDivOpModel {
+ public:
+  using BaseDivOpModel::BaseDivOpModel;
+
+  std::vector<int32_t> GetOutput() { return ExtractVector<int32_t>(output_); }
+};
+
+class QuantizedDivOpModel : public BaseDivOpModel {
+ public:
+  using BaseDivOpModel::BaseDivOpModel;
+
+  template <typename integer_dtype>
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<integer_dtype>(ExtractVector<integer_dtype>(output_),
+                                     GetScale(output_), GetZeroPoint(output_));
+  }
+};
+
+// For quantized Div, the error shouldn't exceed (2*step + step^2).
+inline float GetTolerance(int min, int max) {
+  const float kQuantizedStep = (max - min) / 255.0f;
+  const float kQuantizedTolerance =
+      2.0f * kQuantizedStep + kQuantizedStep * kQuantizedStep;
+  return kQuantizedTolerance;
+}
+
+TEST(FloatDivOpTest, NoActivation) {
+  FloatDivOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
+                    {TensorType_FLOAT32, {1, 2, 2, 1}},
+                    {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+  m.PopulateTensor<float>(m.input1(), {-0.2, 0.2, -1.2, 0.8});
+  m.PopulateTensor<float>(m.input2(), {0.5, 0.2, -1.5, 0.5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({-0.4, 1.0, 0.8, 1.6})));
+}
+
+TEST(FloatDivOpTest, ActivationRELU_N1_TO_1) {
+  FloatDivOpModel m(
+      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
+      {TensorType_FLOAT32, {}}, ActivationFunctionType_RELU_N1_TO_1);
+  m.PopulateTensor<float>(m.input1(), {-0.2, 0.2, -1.2, 0.8});
+  m.PopulateTensor<float>(m.input2(), {0.1, 0.2, -1.5, 0.5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({-1.0, 1.0, 0.8, 1.0})));
+}
+
+TEST(FloatDivOpTest, VariousInputShapes) {
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.3, 0.8, 1.1, -2.0});
+    m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.6, 0.5, -1.1, -0.1});
+    m.Invoke();
+    EXPECT_THAT(
+        m.GetOutput(),
+        ElementsAreArray(ArrayFloatNear({-20.0, 1.0, 0.5, 1.6, -1.0, 20.0})))
+        << "With shape number " << i;
+  }
+}
+
+TEST(FloatDivOpTest, WithBroadcast) {
+  std::vector<std::vector<int>> test_shapes = {
+      {8}, {2, 4}, {2, 1, 4}, {1, 2, 2, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, {}},  // always a scalar
+                      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<float>(m.input1(),
+                            {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123, -0.32, 0.54});
+    m.PopulateTensor<float>(m.input2(), {0.1});
+    m.Invoke();
+    EXPECT_THAT(m.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(
+                    {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23, -3.2, 5.4})))
+        << "With shape number " << i;
+  }
+}
+
+TEST(FloatDivOpTest, WithBroadcast5D) {
+  std::vector<std::vector<int>> test_shapes = {{1, 2, 1, 2, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    FloatDivOpModel m({TensorType_FLOAT32, test_shapes[i]},
+                      {TensorType_FLOAT32, {}},  // always a scalar
+                      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<float>(m.input1(),
+                            {-0.2, 0.2, 0.07, 0.08, 0.11, -0.123, -0.32, 0.54});
+    m.PopulateTensor<float>(m.input2(), {0.1});
+    m.Invoke();
+    EXPECT_THAT(m.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(
+                    {-2.0, 2.0, 0.7, 0.8, 1.1, -1.23, -3.2, 5.4})))
+        << "With shape number " << i;
+  }
+}
+
+TEST(IntegerDivOpTest, NoActivation) {
+  IntegerDivOpModel m({TensorType_INT32, {1, 2, 2, 1}},
+                      {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
+                      ActivationFunctionType_NONE);
+  m.PopulateTensor<int32_t>(m.input1(), {-2, 2, -15, 8});
+  m.PopulateTensor<int32_t>(m.input2(), {5, -2, -3, 5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, -1, 5, 1}));
+}
+
+TEST(IntegerDivOpTest, ActivationRELU_N1_TO_1) {
+  IntegerDivOpModel m({TensorType_INT32, {1, 2, 2, 1}},
+                      {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
+                      ActivationFunctionType_RELU_N1_TO_1);
+  m.PopulateTensor<int32_t>(m.input1(), {-2, 2, -12, 8});
+  m.PopulateTensor<int32_t>(m.input2(), {1, 2, -15, 5});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1, 1, 0, 1}));
+}
+
+TEST(IntegerDivOpTest, VariousInputShapes) {
+  std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    IntegerDivOpModel m({TensorType_INT32, test_shapes[i]},
+                        {TensorType_INT32, test_shapes[i]},
+                        {TensorType_INT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<int32_t>(m.input1(), {-20, 2, 3, 8, 11, -20});
+    m.PopulateTensor<int32_t>(m.input2(), {1, 2, 6, 5, -11, -1});
+    m.Invoke();
+    EXPECT_THAT(m.GetOutput(), ElementsAreArray({-20, 1, 0, 1, -1, 20}))
+        << "With shape number " << i;
+  }
+}
+
+TEST(IntegerDivOpTest, WithBroadcast) {
+  std::vector<std::vector<int>> test_shapes = {
+      {8}, {2, 4}, {2, 1, 4}, {1, 4, 1, 2}, {1, 2, 1, 2, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    IntegerDivOpModel m({TensorType_INT32, test_shapes[i]},
+                        {TensorType_INT32, {}},  // always a scalar
+                        {TensorType_INT32, {}}, ActivationFunctionType_NONE);
+    m.PopulateTensor<int32_t>(m.input1(), {-20, 21, 7, 8, 11, -123, -42, -48});
+    m.PopulateTensor<int32_t>(m.input2(), {3});
+    m.Invoke();
+    EXPECT_THAT(m.GetOutput(),
+                ElementsAreArray({-6, 7, 2, 2, 3, -41, -14, -16}))
+        << "With shape number " << i;
+  }
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedNoActivation() {
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                        {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                        {tensor_type, {}, -1.0, 1.0},
+                        ActivationFunctionType_NONE);
+  m.QuantizeAndPopulate<integer_dtype>(m.input1(), {-0.8, -0.2, 0.3, 0.7});
+  m.QuantizeAndPopulate<integer_dtype>(m.input2(), {-0.8, 0.4, 0.8, 1.0});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
+              ElementsAreArray(ArrayFloatNear({1.0, -0.5, 0.375, 0.7},
+                                              kQuantizedTolerance)));
+}
+
+TEST(QuantizedDivOpTest, QuantizedNoActivationUInt8) {
+  QuantizedNoActivation<TensorType_UINT8, uint8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedActivationRELU_N1_TO_1() {
+  const float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  const std::vector<std::vector<float>> inputs1 = {{-0.8, 0.2, 0.9, 0.7},
+                                                   {-0.5, 0.2, 0.6, 0.3}};
+  const std::vector<std::vector<float>> inputs2 = {{0.6, 0.4, 0.9, -0.8},
+                                                   {0.6, 0.5, -0.8, 0.5}};
+  const std::vector<std::vector<float>> results = {{-1.0, 0.5, 1.0, -0.875},
+                                                   {-0.833, 0.4, -0.75, 0.6}};
+  for (int i = 0; i < inputs1.size(); ++i) {
+    QuantizedDivOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {1, 2, 2, 1}, -1.0, 1.0},
+                          {tensor_type, {}, -1.0, 1.0},
+                          ActivationFunctionType_RELU_N1_TO_1);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(), inputs1[i]);
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), inputs2[i]);
+    m.Invoke();
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance)))
+        << "With test number " << i;
+  }
+}
+
+TEST(QuantizedDivOpTest, QuantizedActivationRELU_N1_TO_1UInt8) {
+  QuantizedActivationRELU_N1_TO_1<TensorType_UINT8, uint8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedVariousInputShapes() {
+  const float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
+  const std::vector<std::vector<int>> test_shapes = {
+      {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedDivOpModel m({tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, test_shapes[i], -3.0, 3.0},
+                          {tensor_type, {}, -3.0, 3.0},
+                          ActivationFunctionType_NONE);
+    m.QuantizeAndPopulate<integer_dtype>(m.input1(),
+                                         {-2.0, 0.2, 1.7, 0.9, 0.4, 2.0});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(),
+                                         {1.3, 0.3, 1.1, 0.4, -1.1, 1.9});
+    m.Invoke();
+    EXPECT_THAT(
+        m.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(
+            {-1.538, 0.667, 1.545, 2.25, -0.364, 1.053}, kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+}
+
+TEST(QuantizedDivOpTest, QuantizedVariousInputShapesUInt8) {
+  QuantizedVariousInputShapes<TensorType_UINT8, uint8_t>();
+}
+
+template <TensorType tensor_type, typename integer_dtype>
+void QuantizedWithBroadcast() {
+  const float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
+  const std::vector<std::vector<int>> test_shapes = {
+      {8}, {2, 4}, {2, 1, 4}, {1, 4, 1, 2}, {1, 2, 1, 2, 2}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedDivOpModel m(
+        {tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0},
+        {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE);
+    m.QuantizeAndPopulate<integer_dtype>(
+        m.input1(), {-2.0, 0.2, 0.7, 0.8, -0.5, 1.1, -1.3, 1.2});
+    m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.7});
+    m.Invoke();
+    EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
+                ElementsAreArray(ArrayFloatNear(
+                    {-2.857, 0.286, 1.0, 1.143, -0.714, 1.571, -1.857, 1.714},
+                    kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+}
+
+TEST(QuantizedDivOpTest, QuantizedWithBroadcastUInt8) {
+  QuantizedWithBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+}  // namespace
+}  // namespace tflite