Move autotuning.proto to tensorflow/core/protobuf. This is in preparation of the next patch, to tensorflow::Logger::LogProto() for TF convolutions.

This patch also has a functional change, that is to not log an AutotuneResult if the profile result is not valid. Usually they are just unsupported configurations. Ideally we'd log the exact error message of unsupported configs, but today StreamExecutor doesn't propagate it anyway.

PiperOrigin-RevId: 236378741
This commit is contained in:
Tim Shen 2019-03-01 14:28:03 -08:00 committed by TensorFlower Gardener
parent 4a07787d7d
commit b562be27f7
9 changed files with 83 additions and 66 deletions

View File

@ -16,7 +16,6 @@ limitations under the License.
#ifndef TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
#define TENSORFLOW_COMPILER_XLA_PROTOBUF_UTIL_H_
#include "google/protobuf/duration.pb.h"
#include "absl/time/time.h"
#include "tensorflow/compiler/xla/statusor.h"
#include "tensorflow/compiler/xla/types.h"
@ -45,20 +44,6 @@ Status DumpProtoToDirectory(const tensorflow::protobuf::Message& message,
// dirpath along as-is.
void RegisterDirectoryExpander(const std::function<string(string)>& expander);
// Converts an absl::Duration to a google::protobuf::Duration.
inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
google::protobuf::Duration proto;
proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
proto.set_nanos(
absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
return proto;
}
// Converts a google::protobuf::Duration to an absl::Duration.
inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
}
} // namespace protobuf_util
} // namespace xla

View File

@ -440,15 +440,14 @@ cc_library(
srcs = ["cudnn_conv_algorithm_picker.cc"],
hdrs = ["cudnn_conv_algorithm_picker.h"],
deps = [
":autotuning_proto",
":backend_configs",
":buffer_comparator",
":cudnn_conv_runner",
":gpu_autotuning_proto",
":gpu_executable",
":ir_emission_utils",
":scratch_allocator",
"//tensorflow/compiler/xla:literal_util",
"//tensorflow/compiler/xla:protobuf_util",
"//tensorflow/compiler/xla/service:compiler",
"//tensorflow/compiler/xla/service:device_memory_allocator",
"//tensorflow/compiler/xla/service:hlo",
@ -456,7 +455,9 @@ cc_library(
"//tensorflow/compiler/xla/service:hlo_pass",
"//tensorflow/core:lib",
"//tensorflow/core:logger",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:stream_executor_no_cuda",
"//tensorflow/core/util/proto:proto_utils",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@com_google_absl//absl/time",
@ -776,6 +777,7 @@ cc_library(
hdrs = ["gpu_transfer_manager.h"],
deps = [
":gpu_compiler",
":infeed_manager",
":outfeed_manager",
"//tensorflow/compiler/xla:literal",
"//tensorflow/compiler/xla:literal_util",
@ -788,7 +790,6 @@ cc_library(
"//tensorflow/compiler/xla:xla_data_proto",
"//tensorflow/compiler/xla/service:generic_transfer_manager",
"//tensorflow/compiler/xla/service:transfer_manager",
"//tensorflow/compiler/xla/service/gpu:infeed_manager",
"//tensorflow/core:lib",
"//tensorflow/core:stream_executor_no_cuda",
"@com_google_absl//absl/memory",
@ -1137,8 +1138,8 @@ tf_cc_test(
srcs = ["cudnn_fused_conv_rewriter_test.cc"],
tags = tf_cuda_tests_tags(),
deps = [
":ir_emission_utils",
"//tensorflow/compiler/xla/service:hlo_parser",
"//tensorflow/compiler/xla/service/gpu:ir_emission_utils",
"//tensorflow/compiler/xla/service/gpu/tests:gpu_codegen_test",
"//tensorflow/compiler/xla/tests:hlo_test_base",
"//tensorflow/core:test",
@ -1183,10 +1184,11 @@ tf_cc_test(
)
xla_proto_library(
name = "autotuning_proto",
srcs = ["autotuning.proto"],
name = "gpu_autotuning_proto",
srcs = ["gpu_autotuning.proto"],
deps = [
"//tensorflow/compiler/xla:xla_data_proto",
"//tensorflow/compiler/xla/service:hlo_proto",
"//tensorflow/core:protos_all_cc",
],
)

View File

@ -14,21 +14,23 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
#include "google/protobuf/any.pb.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/time/time.h"
#include "absl/types/optional.h"
#include "tensorflow/compiler/xla/literal_util.h"
#include "tensorflow/compiler/xla/protobuf_util.h"
#include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h"
#include "tensorflow/compiler/xla/service/gpu/buffer_comparator.h"
#include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h"
#include "tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h"
#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
#include "tensorflow/compiler/xla/service/gpu/scratch_allocator.h"
#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/platform/logger.h"
#include "tensorflow/core/platform/mutex.h"
#include "tensorflow/core/util/proto/proto_utils.h"
namespace xla {
namespace gpu {
@ -37,6 +39,7 @@ namespace {
using absl::optional;
using se::DeviceMemoryBase;
using se::dnn::AlgorithmDesc;
using tensorflow::AutotuneResult;
std::vector<AlgorithmDesc> GetAlgorithms(CudnnConvKind kind,
se::StreamExecutor* stream_exec) {
@ -94,8 +97,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) {
return tensorflow::mutex_lock{it->second};
}
xla::gpu::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
xla::gpu::CudnnVersion cudnn_version;
tensorflow::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
tensorflow::CudnnVersion cudnn_version;
if (auto* dnn = stream_executor->AsDnn()) {
StatusOr<se::dnn::VersionInfo> version_or = dnn->GetVersion();
if (version_or.ok()) {
@ -108,9 +111,9 @@ xla::gpu::CudnnVersion GetCudnnVersion(se::StreamExecutor* stream_executor) {
return cudnn_version;
}
xla::gpu::ComputeCapability GetComputeCapability(
tensorflow::ComputeCapability GetComputeCapability(
se::StreamExecutor* stream_executor) {
xla::gpu::ComputeCapability cc;
tensorflow::ComputeCapability cc;
int cc_major, cc_minor;
stream_executor->GetDeviceDescription().cuda_compute_capability(&cc_major,
&cc_minor);
@ -243,25 +246,23 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer,
&scratch_allocator, &stream, options);
if (!launch_status.ok()) {
continue;
}
if (!profile_result.is_valid()) {
continue;
}
profile_results.emplace_back();
AutotuneResult& result = profile_results.back();
result.mutable_conv()->set_algorithm(alg.algo_id());
result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());
if (!launch_status.ok()) {
result.set_error_string(launch_status.error_message());
continue;
}
if (!profile_result.is_valid()) {
result.set_error_string("Invalid profile result");
continue;
}
int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes();
result.mutable_success()->set_scratch_bytes(scratch_bytes_used);
*result.mutable_success()->mutable_run_time() =
protobuf_util::ToDurationProto(
tensorflow::proto_utils::ToDurationProto(
absl::Milliseconds(profile_result.elapsed_time_in_ms()));
const bool crash_on_checking_failure =
@ -308,10 +309,14 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
// Log the autotuning result.
{
AutotuneLog log;
*log.mutable_instr()->mutable_instruction() = instr->ToProto();
for (const auto* op : instr->operands()) {
*log.mutable_instr()->add_operand_shapes() = op->shape().ToProto();
tensorflow::AutotuningLog log;
{
ConvInstructionLog instr_log;
*instr_log.mutable_instruction() = instr->ToProto();
for (const auto* op : instr->operands()) {
*instr_log.add_operand_shapes() = op->shape().ToProto();
}
log.mutable_instr()->PackFrom(instr_log);
}
for (const auto& profile : profile_results) {
*log.add_results() = profile;
@ -330,13 +335,14 @@ StatusOr<AutotuneResult> CudnnConvAlgorithmPicker::PickBestAlgorithm(
// The successful one should have a smaller key, since we are doing
// min_element. If they are both unsuccessful, keep the earlier one in
// the vector by comparing pointers.
return std::make_tuple(
!lhs.has_success(),
protobuf_util::FromDurationProto(lhs.success().run_time()),
&lhs) < std::make_tuple(!rhs.has_success(),
protobuf_util::FromDurationProto(
rhs.success().run_time()),
&rhs);
return std::make_tuple(!lhs.has_success(),
tensorflow::proto_utils::FromDurationProto(
lhs.success().run_time()),
&lhs) <
std::make_tuple(!rhs.has_success(),
tensorflow::proto_utils::FromDurationProto(
rhs.success().run_time()),
&rhs);
});
if (best_result != profile_results_end && best_result->has_success()) {

View File

@ -20,12 +20,12 @@ limitations under the License.
#include "absl/types/optional.h"
#include "tensorflow/compiler/xla/service/compiler.h"
#include "tensorflow/compiler/xla/service/device_memory_allocator.h"
#include "tensorflow/compiler/xla/service/gpu/autotuning.pb.h"
#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h"
#include "tensorflow/compiler/xla/service/hlo_instructions.h"
#include "tensorflow/compiler/xla/service/hlo_module.h"
#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
#include "tensorflow/core/protobuf/autotuning.pb.h"
namespace xla {
namespace gpu {
@ -50,7 +50,7 @@ class CudnnConvAlgorithmPicker : public HloModulePass {
private:
StatusOr<bool> RunOnComputation(HloComputation* computation);
StatusOr<bool> RunOnInstruction(HloInstruction* instr);
StatusOr<AutotuneResult> PickBestAlgorithm(
StatusOr<tensorflow::AutotuneResult> PickBestAlgorithm(
const HloCustomCallInstruction* instr);
se::StreamExecutor* stream_exec_; // never null

View File

@ -0,0 +1,13 @@
// This is used for convolution logging. Also see
// tensorflow/core/protobuf/autotuing.h
syntax = "proto3";
package xla.gpu;
import "tensorflow/compiler/xla/service/hlo.proto";
import "tensorflow/compiler/xla/xla_data.proto";
message ConvInstructionLog {
xla.HloInstructionProto instruction = 1;
repeated xla.ShapeProto operand_shapes = 2;
}

View File

@ -233,6 +233,7 @@ CORE_PROTO_SRCS = COMMON_PROTO_SRCS + ERROR_CODES_PROTO_SRCS
ADDITIONAL_CORE_PROTO_SRCS = [
"example/example_parser_configuration.proto",
"protobuf/trackable_object_graph.proto",
"protobuf/autotuning.proto",
"protobuf/control_flow.proto",
# TODO(ebrevdo): Re-enable once CriticalSection is in core.
# "protobuf/critical_section.proto",

View File

@ -1,15 +1,14 @@
// This file defines protos that store the results of autotuning XLA:GPU
// This file defines protos that store the results of autotuning various
// operations.
//
// They are in proto format because we want to log them structured. They offer
// tremendous statistical, testing, and debugging value.
syntax = "proto3";
package xla.gpu;
package tensorflow;
import "google/protobuf/any.proto";
import "google/protobuf/duration.proto";
import "tensorflow/compiler/xla/xla_data.proto";
import "tensorflow/compiler/xla/service/hlo.proto";
message CudnnVersion {
int32 major = 1;
@ -63,19 +62,12 @@ message AutotuneResult {
}
}
message AutotuneLog {
message Instruction {
xla.HloInstructionProto instruction = 1;
repeated xla.ShapeProto operand_shapes = 2;
}
oneof instr_oneof {
Instruction instr = 1;
}
message AutotuningLog {
google.protobuf.Any instr = 1;
// Records all auto-tuning results per algorithm.
repeated AutotuneResult results = 3;
repeated AutotuneResult results = 2;
CudnnVersion cudnn_version = 4;
ComputeCapability compute_capability = 5;
CudnnVersion cudnn_version = 3;
ComputeCapability compute_capability = 4;
}

View File

@ -70,6 +70,8 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:platform_base",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
"@protobuf_archive//:protobuf_headers",
],
)

View File

@ -16,7 +16,9 @@ limitations under the License.
#ifndef TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_
#define TENSORFLOW_CORE_UTIL_PROTO_PROTO_UTILS_H_
#include "google/protobuf/duration.pb.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/protobuf.h"
@ -58,6 +60,20 @@ class StringErrorCollector : public protobuf::io::ErrorCollector {
const int index_offset_;
};
// Converts an absl::Duration to a google::protobuf::Duration.
inline google::protobuf::Duration ToDurationProto(absl::Duration duration) {
google::protobuf::Duration proto;
proto.set_seconds(absl::IDivDuration(duration, absl::Seconds(1), &duration));
proto.set_nanos(
absl::IDivDuration(duration, absl::Nanoseconds(1), &duration));
return proto;
}
// Converts a google::protobuf::Duration to an absl::Duration.
inline absl::Duration FromDurationProto(google::protobuf::Duration proto) {
return absl::Seconds(proto.seconds()) + absl::Nanoseconds(proto.nanos());
}
} // namespace proto_utils
} // namespace tensorflow