Enable tracing for nested thunks.

- moves the annotation cache to the thunk itself
 - iterates through the whole executable, not just top-level thunks, to construct annotations
 - constructs a ScopedAnnotation anywhere an atomic thunk might be executed (currently in GpuExecutable and SequentialThunk)

PiperOrigin-RevId: 305075290
Change-Id: Ice38b1a5abd88899bbf7ca1f5e3d94f09a5d1573
This commit is contained in:
Jiho Choi 2020-04-06 11:21:48 -07:00 committed by TensorFlower Gardener
parent da2d11092e
commit 20bacc0d35
11 changed files with 49 additions and 17 deletions

View File

@ -43,6 +43,13 @@ ConditionalThunk::ConditionalThunk(
}
}
void ConditionalThunk::ComputeAnnotations() {
Thunk::ComputeAnnotations();
for (auto& branch_thunk : branch_thunks_) {
branch_thunk->ComputeAnnotations();
}
}
Status ConditionalThunk::Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) {
if (branch_index_is_bool_) {

View File

@ -51,6 +51,7 @@ class ConditionalThunk : public Thunk {
ConditionalThunk(const ConditionalThunk&) = delete;
ConditionalThunk& operator=(const ConditionalThunk&) = delete;
void ComputeAnnotations() override;
Status Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) override;
Status ExecuteOnStream(const ExecuteParams& params) override;

View File

@ -34,6 +34,11 @@ ForThunk::ForThunk(const int64 loop_limit,
// this ForThunk, and shouldn't be profiled separately from it.
std::move(*body_thunk_sequence), nullptr)) {}
void ForThunk::ComputeAnnotations() {
Thunk::ComputeAnnotations();
body_thunk_sequence_->ComputeAnnotations();
}
Status ForThunk::Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) {
TF_RETURN_IF_ERROR(body_thunk_sequence_->Initialize(executable, executor));

View File

@ -37,6 +37,7 @@ class ForThunk : public Thunk {
ForThunk(const ForThunk&) = delete;
ForThunk& operator=(const ForThunk&) = delete;
void ComputeAnnotations() override;
Status Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) override;
Status ExecuteOnStream(const ExecuteParams& params) override;

View File

@ -45,7 +45,7 @@ namespace xla {
namespace gpu {
namespace {
using tensorflow::profiler::ScopedAnnotation;
using ::tensorflow::profiler::ScopedAnnotation;
} // namespace
@ -91,13 +91,8 @@ GpuExecutable::~GpuExecutable() {
}
void GpuExecutable::ComputeThunkAnnotations() {
CanonicalNameMap canonical_name_map;
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
const HloInstruction* hlo = thunk->hlo_instruction();
CHECK(hlo);
thunk_annotations_[thunk] =
absl::StrFormat("Thunk#hlo_op=%s,hlo_module=%s#", hlo->name(),
hlo->GetModule()->name());
thunk->ComputeAnnotations();
}
}
@ -173,17 +168,13 @@ Status GpuExecutable::ExecuteThunks(
tensorflow::profiler::TraceMeLevel::kInfo);
std::map<const Thunk*, std::unique_ptr<se::Event>> thunk_to_finish_event;
bool scoped_annotation_enabled = ScopedAnnotation::IsEnabled();
std::vector<std::function<void()>> deferred_host_callbacks;
for (Thunk* thunk : thunk_schedule_->TotalOrder()) {
CHECK(thunk->hlo_instruction());
// Annotate execution of this op if tracing was enabled when we started
// running this module. If tracing is enabled *while* we're running the
// module, we won't get any data, but that's probably an OK trade-off.
absl::optional<ScopedAnnotation> op_annotation;
CHECK(thunk->hlo_instruction());
if (scoped_annotation_enabled) {
op_annotation.emplace(FindOrDie(thunk_annotations_, thunk));
}
ScopedAnnotation annotation([&] { return thunk->profile_annotation(); });
TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor));
int32 stream_no =

View File

@ -151,10 +151,6 @@ class GpuExecutable : public Executable {
// memory for every output/temp buffers.
const std::shared_ptr<const BufferAssignment> assignment_;
// Maps a thunk to a string describing the thunk. This is useful when
// constructing ScopeAnnotation objects.
absl::flat_hash_map<Thunk*, string> thunk_annotations_;
// Cache of module handles and constant buffer allocation maps used by
// `ResolveConstantGlobals`.
tensorflow::mutex module_handle_mutex_;

View File

@ -17,14 +17,23 @@ limitations under the License.
#include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/profiler/lib/scoped_annotation.h"
namespace xla {
namespace gpu {
using ::tensorflow::profiler::ScopedAnnotation;
SequentialThunk::SequentialThunk(std::vector<std::unique_ptr<Thunk>> thunks,
const HloInstruction* hlo)
: Thunk(Kind::kSequential, hlo), thunks_(std::move(thunks)) {}
void SequentialThunk::ComputeAnnotations() {
for (const auto& thunk : thunks_) {
thunk->ComputeAnnotations();
}
}
Status SequentialThunk::Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) {
for (auto& thunk : thunks_) {
@ -37,6 +46,7 @@ Status SequentialThunk::ExecuteOnStream(const ExecuteParams& params) {
auto op_profiler =
params.profiler->MakeScopedInstructionProfiler(hlo_instruction());
for (const auto& thunk : thunks_) {
ScopedAnnotation annotation([&] { return thunk->profile_annotation(); });
TF_RETURN_IF_ERROR(thunk->ExecuteOnStream(params));
}
return Status::OK();

View File

@ -39,6 +39,7 @@ class SequentialThunk : public Thunk {
const std::vector<std::unique_ptr<Thunk>>& thunks() const { return thunks_; }
void ComputeAnnotations() override;
Status Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) override;
Status ExecuteOnStream(const ExecuteParams& params) override;

View File

@ -79,6 +79,18 @@ class Thunk {
Kind kind() const { return kind_; }
const HloInstruction* hlo_instruction() const { return hlo_instruction_; }
string profile_annotation() const { return profile_annotation_; }
// Constructs and caches the profile annotation string for this thunk and
// any child thunks.
virtual void ComputeAnnotations() {
const HloInstruction* hlo = hlo_instruction();
if (hlo) {
profile_annotation_ =
absl::StrFormat("Thunk:#hlo_op=%s,hlo_module=%s#", hlo->name(),
hlo->GetModule()->name());
}
}
// Prepares the thunk for execution on the given StreamExecutor.
//
@ -130,6 +142,7 @@ class Thunk {
private:
Kind kind_;
const HloInstruction* hlo_instruction_;
string profile_annotation_;
};
// A sequence of thunks.

View File

@ -39,6 +39,12 @@ WhileThunk::WhileThunk(
body_thunk_sequence_(absl::make_unique<SequentialThunk>(
std::move(*body_thunk_sequence), nullptr)) {}
void WhileThunk::ComputeAnnotations() {
Thunk::ComputeAnnotations();
condition_thunk_sequence_->ComputeAnnotations();
body_thunk_sequence_->ComputeAnnotations();
}
Status WhileThunk::Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) {
TF_RETURN_IF_ERROR(

View File

@ -46,6 +46,7 @@ class WhileThunk : public Thunk {
WhileThunk(const WhileThunk&) = delete;
WhileThunk& operator=(const WhileThunk&) = delete;
void ComputeAnnotations() override;
Status Initialize(const GpuExecutable& executable,
se::StreamExecutor* executor) override;
Status ExecuteOnStream(const ExecuteParams& params) override;