Add initial TpuTracer.

PiperOrigin-RevId: 347408830
Change-Id: I729103e841c33d155e7e1e9dafac6c20707f877e
This commit is contained in:
Michael Banfield 2020-12-14 10:01:39 -08:00 committed by TensorFlower Gardener
parent cd052fa5f0
commit 3acbbe9df6
6 changed files with 182 additions and 0 deletions

View File

@ -0,0 +1,29 @@
load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
load("//tensorflow/core/profiler/builds:build_config.bzl", "tf_profiler_copts")
package(
default_visibility = ["//tensorflow:internal"],
licenses = ["notice"], # Apache 2.0
)
cc_library(
name = "tpu_tracer",
srcs = ["tpu_tracer.cc"],
copts = tf_profiler_copts(),
deps = [
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/profiler:profiler_options_proto_cc",
"//tensorflow/core/profiler/lib:profiler_factory",
"//tensorflow/core/profiler/lib:profiler_interface",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:time_utils",
"//tensorflow/core/profiler/utils:xplane_schema",
"//tensorflow/core/profiler/utils:xplane_utils",
"//tensorflow/core/tpu:tpu_api",
"//tensorflow/core/tpu:tpu_ops_c_api_hdrs",
"//tensorflow/stream_executor/tpu:status_helper",
"@com_google_absl//absl/strings",
],
alwayslink = True,
)

View File

@ -0,0 +1,120 @@
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "absl/strings/str_split.h"
#include "absl/strings/string_view.h"
#include "tensorflow/core/framework/step_stats.pb.h"
#include "tensorflow/core/platform/errors.h"
#include "tensorflow/core/platform/status.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/profiler/lib/profiler_factory.h"
#include "tensorflow/core/profiler/lib/profiler_interface.h"
#include "tensorflow/core/profiler/profiler_options.pb.h"
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/tpu/tpu_api.h"
#include "tensorflow/core/tpu/tpu_ops_c_api.h"
#include "tensorflow/stream_executor/tpu/status_helper.h"
namespace tensorflow {
namespace profiler {
namespace {
// Tpu implementation of ProfilerInterface.
//
// Thread-safety: This class is go/thread-compatible.
class TpuTracer : public ProfilerInterface {
public:
explicit TpuTracer();
~TpuTracer() override;
Status Start() override;
Status Stop() override;
// Unsupported.
Status CollectData(RunMetadata* run_metadata) override;
Status CollectData(XSpace* space) override;
private:
TpuProfiler* tpu_profiler_;
};
TpuTracer::TpuTracer() {
tpu_profiler_ = tpu::OpsApiFn()->TpuProfiler_CreateFn();
}
TpuTracer::~TpuTracer() { tpu::OpsApiFn()->TpuProfiler_FreeFn(tpu_profiler_); }
Status TpuTracer::Start() {
StatusHelper status;
tpu::OpsApiFn()->TpuProfiler_StartFn(tpu_profiler_, status.c_status);
if (!status.ok()) {
VLOG(1) << "Run Start failed.";
return status.status();
}
return Status::OK();
}
Status TpuTracer::Stop() {
StatusHelper status;
tpu::OpsApiFn()->TpuProfiler_StopFn(tpu_profiler_, status.c_status);
if (!status.ok()) {
VLOG(1) << "Run Stop failed.";
return status.status();
}
return Status::OK();
}
Status TpuTracer::CollectData(RunMetadata* run_metadata) {
// Unsupported
return Status::OK();
}
Status TpuTracer::CollectData(XSpace* space) {
StatusHelper status;
tpu::OpsApiFn()->TpuProfiler_CollectDataFn(tpu_profiler_, status.c_status,
space);
if (!status.ok()) {
VLOG(1) << "Run CollectData failed.";
return status.status();
}
return Status::OK();
}
} // namespace
// Not in anonymous namespace for testing purposes.
std::unique_ptr<ProfilerInterface> CreateTpuTracer(
const ProfileOptions& options) {
if (options.device_type() != ProfileOptions::TPU &&
options.device_type() != ProfileOptions::UNSPECIFIED) {
return nullptr;
}
return absl::make_unique<TpuTracer>();
}
auto register_host_tracer_factory = [] {
RegisterProfilerFactory(&CreateTpuTracer);
return 0;
}();
} // namespace profiler
} // namespace tensorflow

View File

@ -116,6 +116,7 @@ cc_library(
name = "tpu_api",
srcs = ["tpu_api.cc"],
hdrs = ["tpu_api.h"],
visibility = ["//visibility:public"],
deps = [
":libtftpu_header",
":tpu_executor_api",
@ -344,6 +345,7 @@ cc_library(
visibility = ["//visibility:public"],
deps = [
":libtftpu_header",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/stream_executor/tpu:c_api_decl",
"//tensorflow/stream_executor/tpu:proto_helper",
],

View File

@ -70,6 +70,12 @@ tensorflow::Status SetTpuOpsStructFns(void* library_handle) {
TFTPU_SET_FN(ops_api_fn, TpuCompile_CreateCompilationCacheKey);
TFTPU_SET_FN(ops_api_fn, TpuCompile_DestroyCompilationCacheKey);
TFTPU_SET_FN(ops_api_fn, TpuCompile_CreateGuaranteedConstFingerprint);
TFTPU_SET_FN(ops_api_fn, TpuProfiler_Create);
TFTPU_SET_FN(ops_api_fn, TpuProfiler_Free);
TFTPU_SET_FN(ops_api_fn, TpuProfiler_Start);
TFTPU_SET_FN(ops_api_fn, TpuProfiler_Stop);
TFTPU_SET_FN(ops_api_fn, TpuProfiler_CollectData);
return tensorflow::Status::OK();
}

View File

@ -19,6 +19,7 @@ limitations under the License.
#include <cstdint>
#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
#include "tensorflow/core/tpu/libtftpu.h"
#include "tensorflow/stream_executor/tpu/c_api_decl.h"
#include "tensorflow/stream_executor/tpu/proto_helper.h"
@ -53,6 +54,8 @@ struct HostComputeMetadataSerializedProto {
typedef struct XLA_TpuMeshState XLA_TpuMeshState;
typedef struct TpuProfiler TpuProfiler;
typedef struct XLA_DeviceAssignment {
const char* bytes;
size_t size;
@ -103,6 +106,21 @@ TFTPU_CAPI_EXPORT void TpuCompile_XrtCompileAndBuild(
TpuSerializedProto xrt_computation, const XLA_TpuMeshState* mesh_state,
XLA_TpuProgram** tpu_programs[], size_t* count, TF_Status* status);
// Creates a new TPU profiler object.
TFTPU_CAPI_EXPORT TpuProfiler* TpuProfiler_Create();
TFTPU_CAPI_EXPORT TpuProfiler* TpuProfiler_Free(TpuProfiler* tpu_profiler);
TFTPU_CAPI_EXPORT void TpuProfiler_Start(TpuProfiler* tpu_profiler,
TF_Status* status);
TFTPU_CAPI_EXPORT void TpuProfiler_Stop(TpuProfiler* tpu_profiler,
TF_Status* status);
TFTPU_CAPI_EXPORT void TpuProfiler_CollectData(
TpuProfiler* tpu_profiler, TF_Status* status,
tensorflow::profiler::XSpace* space);
// Creates a new TPU mesh state object.
TFTPU_CAPI_EXPORT XLA_TpuMeshState* TpuMeshState_Create();
@ -397,6 +415,12 @@ struct TfTpu_OpsApiFn {
TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_Free);
TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_MeshCommonState);
TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Create);
TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Free);
TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Start);
TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Stop);
TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_CollectData);
TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_LoadProgramAndEnqueueToStream);
TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape);
TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize);

View File

@ -5,6 +5,7 @@ load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
package(
default_visibility = [
"//learning/brain/experimental/dtensor:__subpackages__",
"//tensorflow/core/profiler/internal/tpu:__subpackages__",
"//tensorflow/core/tpu:__subpackages__",
],
licenses = ["notice"], # Apache 2.0