diff --git a/tensorflow/compiler/aot/benchmark.cc b/tensorflow/compiler/aot/benchmark.cc
index ff720382812..b1ded79d0ea 100644
--- a/tensorflow/compiler/aot/benchmark.cc
+++ b/tensorflow/compiler/aot/benchmark.cc
@@ -74,16 +74,16 @@ void DumpStatsToStdout(const Stats& stats) {
   const int kBufSize = 1000;
   char buf[kBufSize];
   snprintf(buf, kBufSize, "Mean with %2.0f%% trimmed:", trim_ratio * 100);
-  const string label_trimmed(buf);
+  std::string label_trimmed(buf);
   snprintf(buf, kBufSize, "Mean of %2.0f%% best:", best_ratio * 100);
-  const string label_best(buf);
-  std::vector<std::pair<string, double>> groups = {
+  std::string label_best(buf);
+  std::vector<std::pair<std::string, double>> groups = {
       {"Best:", sorted_us.front()},
       {"Worst:", sorted_us.back()},
       {"Median:", sorted_us[count_us / 2]},
       {"Mean:", sum_us / count_us},
-      {label_trimmed, sum_us_trimmed / count_us_trimmed},
-      {label_best, sum_us_best / count_us_best},
+      {std::move(label_trimmed), sum_us_trimmed / count_us_trimmed},
+      {std::move(label_best), sum_us_best / count_us_best},
   };
   int max_label_size = 0;
   double max_us = 0;
@@ -102,7 +102,7 @@ void DumpStatsToStdout(const Stats& stats) {
   }
   // Dump stats out.
   printf("Benchmark ran %zu iterations over %lld us\n", count_us,
-         stats.total_us);
+         static_cast<long long>(stats.total_us));  // NOLINT
   for (const auto& g : groups) {
     printf("  %-*s %*.3f us\n", max_label_size, g.first.c_str(), max_digits + 4,
            g.second);
@@ -114,7 +114,8 @@ void Benchmark(const Options& options, const BenchmarkFn& fn, Stats* stats) {
   const int64 max_us = (options.max_micros <= 0 && options.max_iters <= 0)
                            ? Options::kDefaultMicros
                            : options.max_micros;
-  printf("Running benchmark for %lld us\n", max_us);
+  // NOLINTNEXTLINE
+  printf("Running benchmark for %lld us\n", static_cast<long long>(max_us));
   const int64 start_us = NowMicros();
   int64 iters = 0;
   while (true) {
diff --git a/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc b/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc
index 7cf9da0c057..57e961917cc 100644
--- a/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/matrix_diag_ops.cc
@@ -278,8 +278,10 @@ class MatrixDiagOp : public XlaOpKernel {
         errors::InvalidArgument(
             "The number of diagonals provided in the input does not "
             "match the lower_diag_index and upper_diag_index range."));
-    const int64 min_num_rows = max_diag_len - std::min(upper_diag_index, 0LL);
-    const int64 min_num_cols = max_diag_len + std::max(lower_diag_index, 0LL);
+    const int64 min_num_rows =
+        max_diag_len - std::min(upper_diag_index, int64{0});
+    const int64 min_num_cols =
+        max_diag_len + std::max(lower_diag_index, int64{0});
     OP_REQUIRES(context, num_rows == -1 || num_rows >= min_num_rows,
                 errors::InvalidArgument("The number of rows is too small."));
     OP_REQUIRES(context, num_cols == -1 || num_cols >= min_num_cols,
@@ -387,8 +389,8 @@ class MatrixDiagPartOp : public XlaOpKernel {
     const int num_diags = upper_diag_index - lower_diag_index + 1;
     if (num_diags > 1) output_shape.AddDim(num_diags);
     const int32 max_diag_len =
-        std::min(num_rows + std::min(upper_diag_index, 0LL),
-                 num_cols - std::max(lower_diag_index, 0LL));
+        std::min(num_rows + std::min(upper_diag_index, int64{0}),
+                 num_cols - std::max(lower_diag_index, int64{0}));
     output_shape.AddDim(max_diag_len);
 
     // Computes output.
@@ -502,8 +504,8 @@ class MatrixSetDiagOp : public XlaOpKernel {
     expected_diag_shape.RemoveLastDims(2);
     if (num_diags > 1) expected_diag_shape.AddDim(num_diags);
     const int32 max_diag_len =
-        std::min(num_rows + std::min(upper_diag_index, 0LL),
-                 num_cols - std::max(lower_diag_index, 0LL));
+        std::min(num_rows + std::min(upper_diag_index, int64{0}),
+                 num_cols - std::max(lower_diag_index, int64{0}));
     expected_diag_shape.AddDim(max_diag_len);
     OP_REQUIRES(
         context, expected_diag_shape == diag_shape,
diff --git a/tensorflow/compiler/xla/client/lib/matrix.cc b/tensorflow/compiler/xla/client/lib/matrix.cc
index 3f4a63c31be..b7721f2bbc5 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix.cc
@@ -125,7 +125,7 @@ XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k) {
 
     // Calculate the indices of diagonal part with offset k.
     const int64 diag_len =
-        std::max(std::min(m + std::min(k, 0), n - std::max(k, 0)), 0LL);
+        std::max(std::min(m + std::min(k, 0), n - std::max(k, 0)), int64{0});
     XlaOp diag_base_indices = BroadcastInDim(Iota(builder, S32, diag_len),
                                              {diag_len, num_index_dims}, {0});
     XlaOp diag_offset =
diff --git a/tensorflow/compiler/xla/client/padding.cc b/tensorflow/compiler/xla/client/padding.cc
index 992b13139c4..885327a5636 100644
--- a/tensorflow/compiler/xla/client/padding.cc
+++ b/tensorflow/compiler/xla/client/padding.cc
@@ -126,8 +126,8 @@ std::vector<std::pair<int64, int64>> MakePadding(
                                 window_dimension - input_dimension,
                             0);
         low_high_padding.emplace_back(
-            tensorflow::MathUtil::FloorOfRatio(padding_size, 2ll),
-            tensorflow::MathUtil::CeilOfRatio(padding_size, 2ll));
+            tensorflow::MathUtil::FloorOfRatio(padding_size, int64{2}),
+            tensorflow::MathUtil::CeilOfRatio(padding_size, int64{2}));
       }
       break;
   }
diff --git a/tensorflow/compiler/xla/python/bfloat16.cc b/tensorflow/compiler/xla/python/bfloat16.cc
index 692d71876f8..2f288094ecd 100644
--- a/tensorflow/compiler/xla/python/bfloat16.cc
+++ b/tensorflow/compiler/xla/python/bfloat16.cc
@@ -608,7 +608,7 @@ int NPyBfloat16_ArgMinFunc(void* data, npy_intp n, npy_intp* min_ind,
 
 // NumPy casts
 
-template <typename T>
+template <typename T, typename Enable = void>
 struct TypeDescriptor {
   // typedef ... T;  // Representation type in memory for NumPy values of type
   // static int Dtype() { return NPY_...; }  // Numpy type number for T.
@@ -638,9 +638,12 @@ struct TypeDescriptor<uint32> {
   static int Dtype() { return NPY_UINT32; }
 };
 
-template <>
-struct TypeDescriptor<uint64> {
-  typedef uint64 T;
+template <typename Uint64Type>
+struct TypeDescriptor<
+    Uint64Type, typename std::enable_if<std::is_integral<Uint64Type>::value &&
+                                        !std::is_signed<Uint64Type>::value &&
+                                        sizeof(Uint64Type) == 8>::type> {
+  typedef Uint64Type T;
   static int Dtype() { return NPY_UINT64; }
 };
 
@@ -662,9 +665,12 @@ struct TypeDescriptor<int32> {
   static int Dtype() { return NPY_INT32; }
 };
 
-template <>
-struct TypeDescriptor<int64> {
-  typedef int64 T;
+template <typename Int64Type>
+struct TypeDescriptor<
+    Int64Type, typename std::enable_if<std::is_integral<Int64Type>::value &&
+                                       std::is_signed<Int64Type>::value &&
+                                       sizeof(Int64Type) == 8>::type> {
+  typedef Int64Type T;
   static int Dtype() { return NPY_INT64; }
 };
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 24718e16e22..a7d0e0e066c 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -299,7 +299,7 @@ int IrEmitter::MinimumAlignmentForPrimitiveType(PrimitiveType primitive_type) {
   DCHECK_LE(byte_size, 16);
 
   // Allocations may be 8-byte aligned if part of a small block.
-  return std::min(8LL, byte_size);
+  return std::min(int64{8}, byte_size);
 }
 
 int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 3f34adaa973..f5d0c889fa3 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -128,7 +128,7 @@ bool IsCublasGemm(const HloInstruction& hlo) {
 std::array<int64, 3> GetReductionTiling(
     const ReductionDimensions& reduction_dimensions) {
   if (reduction_dimensions.is_row_reduction) {
-    int64 tile_z = std::min(reduction_dimensions.dimensions[0], 8LL);
+    int64 tile_z = std::min(reduction_dimensions.dimensions[0], int64{8});
     if (reduction_dimensions.dimensions[1] == 1) {
       CHECK_EQ(reduction_dimensions.dimensions[0], 1);
       return {tile_z, 1, 16};
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index 2276807d74f..4d89e758049 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -86,7 +86,8 @@ LaunchDimensions CalculateLaunchDimensions(
   // need more registers to hold intermediate values. Reduce the number of
   // blocks per thread to increase the number of registers available to ptxas.
   // Make sure we still have a multiple of 32.
-  threads_per_block = RoundUpToNearest(threads_per_block / unroll_factor, 32LL);
+  threads_per_block =
+      RoundUpToNearest(threads_per_block / unroll_factor, int64{32});
   if (num_elements < threads_per_block) {
     threads_per_block = num_elements;
     VLOG(2) << "Update # of threads per block to the element count ("
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index b2435d3fdf3..7159e5bfdf6 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -1769,7 +1769,7 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
       //                                       output_dim_size);
       input_index_clamped[i] =
           std::min(operand_shape.dimensions(i) - output_dim_size,
-                   std::max(0LL, input_gather_index[i]));
+                   std::max(int64{0}, input_gather_index[i]));
     }
     for (int i = 0, e = input_index.size(); i < e; i++) {
       input_index[i] = input_index_clamped[i] + input_window_index[i];
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 4322c26b2de..bdaf9850757 100755
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -496,9 +496,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
             proto.convolution_dimension_numbers());
       }
       custom_call_instr->set_feature_group_count(
-          std::max(static_cast<int64>(proto.feature_group_count()), 1LL));
+          std::max(static_cast<int64>(proto.feature_group_count()), int64{1}));
       custom_call_instr->set_batch_group_count(
-          std::max(static_cast<int64>(proto.batch_group_count()), 1LL));
+          std::max(static_cast<int64>(proto.batch_group_count()), int64{1}));
       custom_call_instr->set_custom_call_has_side_effect(
           proto.custom_call_has_side_effect());
       break;
diff --git a/tensorflow/compiler/xla/service/triangular_solve_expander.cc b/tensorflow/compiler/xla/service/triangular_solve_expander.cc
index 0a8e2c3849f..a19f17996be 100644
--- a/tensorflow/compiler/xla/service/triangular_solve_expander.cc
+++ b/tensorflow/compiler/xla/service/triangular_solve_expander.cc
@@ -313,7 +313,7 @@ XlaOp SolveWithInvertedDiagonalBlocks(XlaOp a, XlaOp b, XlaOp inv_diag_blocks,
         // (namely, X[i * block_size:] = 0), L[i, :i] @ X[:i]
         if (backward) {
           start = {j * block_size,
-                   std::max(0LL, (num_blocks - i) * block_size)};
+                   std::max(int64{0}, (num_blocks - i) * block_size)};
           end = {k, n};
         } else {
           start = {j * block_size, 0};
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index 146d03fa0c5..22ee5a16a30 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -1032,7 +1032,7 @@ ShapeUtil::InsertedOrDeleted1SizedDimensions(const Shape& shape_pre,
     // Check (modified) dimensions between unmodified_dims[i-1] and
     // unmodified_dims[i].
     auto prior_unmodified_dim_pair =
-        i > 0 ? unmodified_dims[i - 1] : std::make_pair(-1LL, -1LL);
+        i > 0 ? unmodified_dims[i - 1] : std::pair<int64, int64>(-1, -1);
     auto unmodified_dim_pair =
         i < unmodified_dims.size()
             ? unmodified_dims[i]