Automated rollback of commit b562be27f7

PiperOrigin-RevId: 236405963
2019-03-01 17:14:14 -08:00 · 2019-03-01 17:14:14 -08:00 · fc33a70c90
commit fc33a70c90
parent a3262818d9
9 changed files with 66 additions and 83 deletions
--- a/tensorflow/compiler/xla/protobuf_util.h
+++ b/tensorflow/compiler/xla/protobuf_util.h
@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_

+#include "google/protobuf/duration.pb.h"
 #include "absl/time/time.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@ -44,6 +45,20 @@ Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
 // dirpath along as-is.
 void RegisterDirectoryExpander(const std::function<string(string)>& expander);

+// Converts an absl::Duration to a google::protobuf::Duration.
+inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
+  google::protobuf::Duration proto;
+  proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
+  proto.set_nanos(
+      absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
+  return proto;
+}
+
+// Converts a google::protobuf::Duration to an absl::Duration.
+inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
+  return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
+}
+
 }  // namespace protobuf_util
 }  // namespace xla

--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@ -440,14 +440,15 @@ cc_library(
    srcs = ["cudnn_conv_algorithm_picker.cc"],
    hdrs = ["cudnn_conv_algorithm_picker.h"],
    deps = [
+        ":autotuning_proto",
        ":backend_configs",
        ":buffer_comparator",
        ":cudnn_conv_runner",
-        ":gpu_autotuning_proto",
        ":gpu_executable",
        ":ir_emission_utils",
        ":scratch_allocator",
        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:protobuf_util",
        "//tensorflow/compiler/xla/service:compiler",
        "//tensorflow/compiler/xla/service:device_memory_allocator",
        "//tensorflow/compiler/xla/service:hlo",
@ -455,9 +456,7 @@ cc_library(
        "//tensorflow/compiler/xla/service:hlo_pass",
        "//tensorflow/core:lib",
        "//tensorflow/core:logger",
-        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:stream_executor_no_cuda",
-        "//tensorflow/core/util/proto:proto_utils",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/strings:str_format",
        "@com_google_absl//absl/time",
@ -777,7 +776,6 @@ cc_library(
    hdrs = ["gpu_transfer_manager.h"],
    deps = [
        ":gpu_compiler",
-        ":infeed_manager",
        ":outfeed_manager",
        "//tensorflow/compiler/xla:literal",
        "//tensorflow/compiler/xla:literal_util",
@ -790,6 +788,7 @@ cc_library(
        "//tensorflow/compiler/xla:xla_data_proto",
        "//tensorflow/compiler/xla/service:generic_transfer_manager",
        "//tensorflow/compiler/xla/service:transfer_manager",
+        "//tensorflow/compiler/xla/service/gpu:infeed_manager",
        "//tensorflow/core:lib",
        "//tensorflow/core:stream_executor_no_cuda",
        "@com_google_absl//absl/memory",
@ -1138,8 +1137,8 @@ tf_cc_test(
    srcs = ["cudnn_fused_conv_rewriter_test.cc"],
    tags = tf_cuda_tests_tags(),
    deps = [
-        ":ir_emission_utils",
        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
        "//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
        "//tensorflow/compiler/xla/tests:hlo_test_base",
        "//tensorflow/core:test",
@ -1184,11 +1183,10 @@ tf_cc_test(
 )

 xla_proto_library(
-    name = "gpu_autotuning_proto",
-    srcs = ["gpu_autotuning.proto"],
+    name = "autotuning_proto",
+    srcs = ["autotuning.proto"],
    deps = [
        "//tensorflow/compiler/xla:xla_data_proto",
        "//tensorflow/compiler/xla/service:hlo_proto",
-        "//tensorflow/core:protos_all_cc",
    ],
 )
--- a/tensorflow/compiler/xla/service/gpu/autotuning.proto
+++ b/tensorflow/compiler/xla/service/gpu/autotuning.proto
@ -1,14 +1,15 @@
-// This file defines protos that store the results of autotuning various
+// This file defines protos that store the results of autotuning XLA:GPU
 // operations.
 //
 // They are in proto format because we want to log them structured. They offer
 // tremendous statistical, testing, and debugging value.
 syntax = "proto3";

-package tensorflow;
+package xla.gpu;

-import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
+import "tensorflow/compiler/xla/xla_data.proto";
+import "tensorflow/compiler/xla/service/hlo.proto";

 message CudnnVersion {
  int32 major = 1;
@ -62,12 +63,19 @@ message AutotuneResult {
  }
 }

-message AutotuningLog {
-  google.protobuf.Any instr = 1;
+message AutotuneLog {
+  message Instruction {
+    xla.HloInstructionProto instruction = 1;
+    repeated xla.ShapeProto operand_shapes = 2;
+  }
+
+  oneof instr_oneof {
+    Instruction instr = 1;
+  }

  // Records all auto-tuning results per algorithm.
-  repeated AutotuneResult results = 2;
+  repeated AutotuneResult results = 3;

-  CudnnVersion cudnn_version = 3;
-  ComputeCapability compute_capability = 4;
+  CudnnVersion cudnn_version = 4;
+  ComputeCapability compute_capability = 5;
 }
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc
@ -14,23 +14,21 @@ limitations under the License.
 ==============================================================================*/

 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
-#include "google/protobuf/any.pb.h"
 #include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "absl/time/time.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/protobuf_util.h"
 #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/buffer_comparator.h"
 #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
-#include "tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/scratch_allocator.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/platform/mutex.h"
-#include "tensorflow/core/util/proto/proto_utils.h"

 namespace xla {
 namespace gpu {
@ -39,7 +37,6 @@ namespace {
 using absl::optional;
 using se::DeviceMemoryBase;
 using se::dnn::AlgorithmDesc;
-using tensorflow::AutotuneResult;

 std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
                                         se::StreamExecutor* stream_exec) {
@ -97,8 +94,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
  return tensorflow::mutex_lock{it->second};
 }

-tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
-  tensorflow::CudnnVersion cudnn_version;
+xla::gpu::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
+  xla::gpu::CudnnVersion cudnn_version;
  if (auto* dnn = stream_executor->AsDnn()) {
    StatusOr<se::dnn::VersionInfo> version_or = dnn->GetVersion();
    if (version_or.ok()) {
@ -111,9 +108,9 @@ tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
  return cudnn_version;
 }

-tensorflow::ComputeCapability GetComputeCapability(
+xla::gpu::ComputeCapability GetComputeCapability(
    se::StreamExecutor* stream_executor) {
-  tensorflow::ComputeCapability cc;
+  xla::gpu::ComputeCapability cc;
  int cc_major, cc_minor;
  stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major,
                                                                  &cc_minor);
@ -246,23 +243,25 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
        RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer,
                     &scratch_allocator, &stream, options);

-    if (!launch_status.ok()) {
-      continue;
-    }
-
-    if (!profile_result.is_valid()) {
-      continue;
-    }
-
    profile_results.emplace_back();
    AutotuneResult& result = profile_results.back();
    result.mutable_conv()->set_algorithm(alg.algo_id());
    result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());

+    if (!launch_status.ok()) {
+      result.set_error_string(launch_status.error_message());
+      continue;
+    }
+
+    if (!profile_result.is_valid()) {
+      result.set_error_string("Invalid profile result");
+      continue;
+    }
+
    int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes();
    result.mutable_success()->set_scratch_bytes(scratch_bytes_used);
    *result.mutable_success()->mutable_run_time() =
-        tensorflow::proto_utils::ToDurationProto(
+        protobuf_util::ToDurationProto(
            absl::Milliseconds(profile_result.elapsed_time_in_ms()));

    const bool crash_on_checking_failure =
@ -309,14 +308,10 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(

  // Log the autotuning result.
  {
-    tensorflow::AutotuningLog log;
-    {
-      ConvInstructionLog instr_log;
-      *instr_log.mutable_instruction() = instr->ToProto();
+    AutotuneLog log;
+    *log.mutable_instr()->mutable_instruction() = instr->ToProto();
    for (const auto* op : instr->operands()) {
-        *instr_log.add_operand_shapes() = op->shape().ToProto();
-      }
-      log.mutable_instr()->PackFrom(instr_log);
+      *log.mutable_instr()->add_operand_shapes() = op->shape().ToProto();
    }
    for (const auto& profile : profile_results) {
      *log.add_results() = profile;
@ -335,12 +330,11 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
        // The successful one should have a smaller key, since we are doing
        // min_element. If they are both unsuccessful, keep the earlier one in
        // the vector by comparing pointers.
-        return std::make_tuple(!lhs.has_success(),
-                               tensorflow::proto_utils::FromDurationProto(
-                                   lhs.success().run_time()),
-                               &lhs) <
-               std::make_tuple(!rhs.has_success(),
-                               tensorflow::proto_utils::FromDurationProto(
+        return std::make_tuple(
+                   !lhs.has_success(),
+                   protobuf_util::FromDurationProto(lhs.success().run_time()),
+                   &lhs) < std::make_tuple(!rhs.has_success(),
+                                           protobuf_util::FromDurationProto(
                                               rhs.success().run_time()),
                                           &rhs);
      });
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h
@ -20,12 +20,12 @@ limitations under the License.
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/device_memory_allocator.h"
+#include "tensorflow/compiler/xla/service/gpu/autotuning.pb.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
-#include "tensorflow/core/protobuf/autotuning.pb.h"

 namespace xla {
 namespace gpu {
@ -50,7 +50,7 @@ class CudnnConvAlgorithmPicker : public HloModulePass {
 private:
  StatusOr<bool> RunOnComputation(HloComputation* computation);
  StatusOr<bool> RunOnInstruction(HloInstruction* instr);
-  StatusOr<tensorflow::AutotuneResult> PickBestAlgorithm(
+  StatusOr<AutotuneResult> PickBestAlgorithm(
      const HloCustomCallInstruction* instr);

  se::StreamExecutor* stream_exec_;                   // never null
--- a/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto
+++ b/tensorflow/compiler/xla/service/gpu/gpu_autotuning.proto
@ -1,13 +0,0 @@
-// This is used for convolution logging. Also see
-// tensorflow/core/protobuf/autotuing.h
-syntax = "proto3";
-
-package xla.gpu;
-
-import "tensorflow/compiler/xla/service/hlo.proto";
-import "tensorflow/compiler/xla/xla_data.proto";
-
-message ConvInstructionLog {
-  xla.HloInstructionProto instruction = 1;
-  repeated xla.ShapeProto operand_shapes = 2;
-}
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -233,7 +233,6 @@ CORE_PROTO_SRCS = COMMON_PROTO_SRCS + ERROR_CODES_PROTO_SRCS
 ADDITIONAL_CORE_PROTO_SRCS = [
    "example/example_parser_configuration.proto",
    "protobuf/trackable_object_graph.proto",
-    "protobuf/autotuning.proto",
    "protobuf/control_flow.proto",
    # TODO(ebrevdo): Re-enable once CriticalSection is in core.
    # "protobuf/critical_section.proto",
--- a/tensorflow/core/util/proto/BUILD
+++ b/tensorflow/core/util/proto/BUILD
@ -70,8 +70,6 @@ cc_library(
        "//tensorflow/core:lib",
        "//tensorflow/core:platform_base",
        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/time",
-        "@protobuf_archive//:protobuf_headers",
    ],
 )

--- a/tensorflow/core/util/proto/proto_utils.h
+++ b/tensorflow/core/util/proto/proto_utils.h
@ -16,9 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_
 #define TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_

-#include "google/protobuf/duration.pb.h"
 #include "absl/strings/string_view.h"
-#include "absl/time/time.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/protobuf.h"
@ -60,20 +58,6 @@ class StringErrorCollector : public protobuf::io::ErrorCollector {
  const int index_offset_;
 };

-// Converts an absl::Duration to a google::protobuf::Duration.
-inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
-  google::protobuf::Duration proto;
-  proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
-  proto.set_nanos(
-      absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
-  return proto;
-}
-
-// Converts a google::protobuf::Duration to an absl::Duration.
-inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
-  return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
-}
-
 }  // namespace proto_utils
 }  // namespace tensorflow