Automated rollback of commit b562be27f7

PiperOrigin-RevId: 236405963
This commit is contained in:
Tim Shen 2019-03-01 17:14:14 -08:00 committed by TensorFlower Gardener
parent a3262818d9
commit fc33a70c90
9 changed files with 66 additions and 83 deletions

View File

@ -16,6 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
#define TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
#include "google/protobuf/duration.pb.h"
#include "absl/time/time.h"
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/compiler/xla/types.h"
@ -44,6 +45,20 @@ Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
// dirpath along as-is.
void RegisterDirectoryExpander(const std::function<string(string)>& expander);
// Converts an absl::Duration to a google::protobuf::Duration.
inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
google::protobuf::Duration proto;
proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
proto.set_nanos(
absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
return proto;
}
// Converts a google::protobuf::Duration to an absl::Duration.
inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
}
} // namespace protobuf_util
} // namespace xla

View File

@ -440,14 +440,15 @@ cc_library(
srcs = ["cudnn_conv_algorithm_picker.cc"],
hdrs = ["cudnn_conv_algorithm_picker.h"],
deps = [
":autotuning_proto",
":backend_configs",
":buffer_comparator",
":cudnn_conv_runner",
":gpu_autotuning_proto",
":gpu_executable",
":ir_emission_utils",
":scratch_allocator",
"//tensorflow/compiler/xla:literal_util",
"//tensorflow/compiler/xla:protobuf_util",
"//tensorflow/compiler/xla/service:compiler",
"//tensorflow/compiler/xla/service:device_memory_allocator",
"//tensorflow/compiler/xla/service:hlo",
@ -455,9 +456,7 @@ cc_library(
"//tensorflow/compiler/xla/service:hlo_pass",
"//tensorflow/core:lib",
"//tensorflow/core:logger",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:stream_executor_no_cuda",
"//tensorflow/core/util/proto:proto_utils",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
@ -777,7 +776,6 @@ cc_library(
hdrs = ["gpu_transfer_manager.h"],
deps = [
":gpu_compiler",
":infeed_manager",
":outfeed_manager",
"//tensorflow/compiler/xla:literal",
"//tensorflow/compiler/xla:literal_util",
@ -790,6 +788,7 @@ cc_library(
"//tensorflow/compiler/xla:xla_data_proto",
"//tensorflow/compiler/xla/service:generic_transfer_manager",
"//tensorflow/compiler/xla/service:transfer_manager",
"//tensorflow/compiler/xla/service/gpu:infeed_manager",
"//tensorflow/core:lib",
"//tensorflow/core:stream_executor_no_cuda",
"@com_google_absl//absl/memory",
@ -1138,8 +1137,8 @@ tf_cc_test(
srcs = ["cudnn_fused_conv_rewriter_test.cc"],
tags = tf_cuda_tests_tags(),
deps = [
":ir_emission_utils",
"//tensorflow/compiler/xla/service:hlo_parser",
"//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
"//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
"//tensorflow/compiler/xla/tests:hlo_test_base",
"//tensorflow/core:test",
@ -1184,11 +1183,10 @@ tf_cc_test(
)
xla_proto_library(
name = "gpu_autotuning_proto",
srcs = ["gpu_autotuning.proto"],
name = "autotuning_proto",
srcs = ["autotuning.proto"],
deps = [
"//tensorflow/compiler/xla:xla_data_proto",
"//tensorflow/compiler/xla/service:hlo_proto",
"//tensorflow/core:protos_all_cc",
],
)

View File

@ -1,14 +1,15 @@
// This file defines protos that store the results of autotuning various
// This file defines protos that store the results of autotuning XLA:GPU
// operations.
//
// They are in proto format because we want to log them structured. They offer
// tremendous statistical, testing, and debugging value.
syntax = "proto3";
package tensorflow;
package xla.gpu;
import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "tensorflow/compiler/xla/xla_data.proto";
import "tensorflow/compiler/xla/service/hlo.proto";
message CudnnVersion {
int32 major = 1;
@ -62,12 +63,19 @@ message AutotuneResult {
}
}
message AutotuningLog {
google.protobuf.Any instr = 1;
message AutotuneLog {
message Instruction {
xla.HloInstructionProto instruction = 1;
repeated xla.ShapeProto operand_shapes = 2;
}
oneof instr_oneof {
Instruction instr = 1;
}
// Records all auto-tuning results per algorithm.
repeated AutotuneResult results = 2;
repeated AutotuneResult results = 3;
CudnnVersion cudnn_version = 3;
ComputeCapability compute_capability = 4;
CudnnVersion cudnn_version = 4;
ComputeCapability compute_capability = 5;
}

View File

@ -14,23 +14,21 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
#include "google/protobuf/any.pb.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/time/time.h"
#include "absl/types/optional.h"
#include "tensorflow/compiler/xla/literal_util.h"
#include "tensorflow/compiler/xla/protobuf_util.h"
#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
#include "tensorflow/compiler/xla/service/gpu/buffer_comparator.h"
#include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h"
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
#include "tensorflow/compiler/xla/service/gpu/scratch_allocator.h"
#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/platform/logger.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/util/proto/proto_utils.h"
namespace xla {
namespace gpu {
@ -39,7 +37,6 @@ namespace {
using absl::optional;
using se::DeviceMemoryBase;
using se::dnn::AlgorithmDesc;
using tensorflow::AutotuneResult;
std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
se::StreamExecutor* stream_exec) {
@ -97,8 +94,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
return tensorflow::mutex_lock{it->second};
}
tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
tensorflow::CudnnVersion cudnn_version;
xla::gpu::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
xla::gpu::CudnnVersion cudnn_version;
if (auto* dnn = stream_executor->AsDnn()) {
StatusOr<se::dnn::VersionInfo> version_or = dnn->GetVersion();
if (version_or.ok()) {
@ -111,9 +108,9 @@ tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
return cudnn_version;
}
tensorflow::ComputeCapability GetComputeCapability(
xla::gpu::ComputeCapability GetComputeCapability(
se::StreamExecutor* stream_executor) {
tensorflow::ComputeCapability cc;
xla::gpu::ComputeCapability cc;
int cc_major, cc_minor;
stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major,
&cc_minor);
@ -246,23 +243,25 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer,
&scratch_allocator, &stream, options);
if (!launch_status.ok()) {
continue;
}
if (!profile_result.is_valid()) {
continue;
}
profile_results.emplace_back();
AutotuneResult& result = profile_results.back();
result.mutable_conv()->set_algorithm(alg.algo_id());
result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());
if (!launch_status.ok()) {
result.set_error_string(launch_status.error_message());
continue;
}
if (!profile_result.is_valid()) {
result.set_error_string("Invalid profile result");
continue;
}
int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes();
result.mutable_success()->set_scratch_bytes(scratch_bytes_used);
*result.mutable_success()->mutable_run_time() =
tensorflow::proto_utils::ToDurationProto(
protobuf_util::ToDurationProto(
absl::Milliseconds(profile_result.elapsed_time_in_ms()));
const bool crash_on_checking_failure =
@ -309,14 +308,10 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
// Log the autotuning result.
{
tensorflow::AutotuningLog log;
{
ConvInstructionLog instr_log;
*instr_log.mutable_instruction() = instr->ToProto();
AutotuneLog log;
*log.mutable_instr()->mutable_instruction() = instr->ToProto();
for (const auto* op : instr->operands()) {
*instr_log.add_operand_shapes() = op->shape().ToProto();
}
log.mutable_instr()->PackFrom(instr_log);
*log.mutable_instr()->add_operand_shapes() = op->shape().ToProto();
}
for (const auto& profile : profile_results) {
*log.add_results() = profile;
@ -335,12 +330,11 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
// The successful one should have a smaller key, since we are doing
// min_element. If they are both unsuccessful, keep the earlier one in
// the vector by comparing pointers.
return std::make_tuple(!lhs.has_success(),
tensorflow::proto_utils::FromDurationProto(
lhs.success().run_time()),
&lhs) <
std::make_tuple(!rhs.has_success(),
tensorflow::proto_utils::FromDurationProto(
return std::make_tuple(
!lhs.has_success(),
protobuf_util::FromDurationProto(lhs.success().run_time()),
&lhs) < std::make_tuple(!rhs.has_success(),
protobuf_util::FromDurationProto(
rhs.success().run_time()),
&rhs);
});

View File

@ -20,12 +20,12 @@ limitations under the License.
#include "absl/types/optional.h"
#include "tensorflow/compiler/xla/service/compiler.h"
#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
#include "tensorflow/compiler/xla/service/gpu/autotuning.pb.h"
#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
#include "tensorflow/compiler/xla/service/hlo_module.h"
#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
#include "tensorflow/core/protobuf/autotuning.pb.h"
namespace xla {
namespace gpu {
@ -50,7 +50,7 @@ class CudnnConvAlgorithmPicker : public HloModulePass {
private:
StatusOr<bool> RunOnComputation(HloComputation* computation);
StatusOr<bool> RunOnInstruction(HloInstruction* instr);
StatusOr<tensorflow::AutotuneResult> PickBestAlgorithm(
StatusOr<AutotuneResult> PickBestAlgorithm(
const HloCustomCallInstruction* instr);
se::StreamExecutor* stream_exec_; // never null

View File

@ -1,13 +0,0 @@
// This is used for convolution logging. Also see
// tensorflow/core/protobuf/autotuing.h
syntax = "proto3";
package xla.gpu;
import "tensorflow/compiler/xla/service/hlo.proto";
import "tensorflow/compiler/xla/xla_data.proto";
message ConvInstructionLog {
xla.HloInstructionProto instruction = 1;
repeated xla.ShapeProto operand_shapes = 2;
}

View File

@ -233,7 +233,6 @@ CORE_PROTO_SRCS = COMMON_PROTO_SRCS + ERROR_CODES_PROTO_SRCS
ADDITIONAL_CORE_PROTO_SRCS = [
"example/example_parser_configuration.proto",
"protobuf/trackable_object_graph.proto",
"protobuf/autotuning.proto",
"protobuf/control_flow.proto",
# TODO(ebrevdo): Re-enable once CriticalSection is in core.
# "protobuf/critical_section.proto",

View File

@ -70,8 +70,6 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:platform_base",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
"@protobuf_archive//:protobuf_headers",
],
)

View File

@ -16,9 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_
#define TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_
#include "google/protobuf/duration.pb.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/protobuf.h"
@ -60,20 +58,6 @@ class StringErrorCollector : public protobuf::io::ErrorCollector {
const int index_offset_;
};
// Converts an absl::Duration to a google::protobuf::Duration.
inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
google::protobuf::Duration proto;
proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
proto.set_nanos(
absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
return proto;
}
// Converts a google::protobuf::Duration to an absl::Duration.
inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
}
} // namespace proto_utils
} // namespace tensorflow