Add profiler service to eager context so that it is started by default.

PiperOrigin-RevId: 294348740
Change-Id: I99eb430c8a5c1c35ad442987a7c50af3f1f92e29
This commit is contained in:
A. Unique TensorFlower 2020-02-10 18:06:41 -08:00 committed by TensorFlower Gardener
parent 114762ab9e
commit 1a287cbaee
7 changed files with 61 additions and 9 deletions

View File

@ -48,9 +48,11 @@ void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) {
}
void TFE_StartProfilerServer(int port) {
// Release child thread intentionally. The child thread can be terminated by
// terminating the main thread.
tensorflow::StartProfilerServer(port).release();
auto profiler_server = absl::make_unique<tensorflow::ProfilerServer>();
profiler_server->StartProfilerServer(port);
// Release child server thread intentionally. The child thread can be
// terminated when the main program exits.
profiler_server.release();
}
void TFE_ContextEnableGraphCollection(TFE_Context* ctx) {

View File

@ -77,6 +77,7 @@ tf_cuda_library(
"//tensorflow/core/distributed_runtime:server_lib",
"//tensorflow/core/distributed_runtime:worker_session",
"//tensorflow/core/distributed_runtime/eager:eager_client",
"//tensorflow/core/profiler/rpc:profiler_server",
],
}),
)

View File

@ -45,6 +45,7 @@ limitations under the License.
#include "tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h"
#include "tensorflow/core/distributed_runtime/device_resolver_distributed.h"
#include "tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h"
#include "tensorflow/core/profiler/rpc/profiler_server.h"
#endif // !IS_MOBILE_PLATFORM
#include "tensorflow/core/framework/resource_mgr.h"
#include "tensorflow/core/lib/core/blocking_counter.h"
@ -110,6 +111,8 @@ EagerContext::EagerContext(
#if !defined(IS_MOBILE_PLATFORM)
context_id_ = kInvalidContextId;
profiler_server_ = absl::make_unique<ProfilerServer>();
profiler_server_->MaybeStartProfilerServer();
#endif // IS_MOBILE_PLATFORM
std::unique_ptr<DeviceResolverInterface> drl(

View File

@ -73,6 +73,8 @@ namespace eager {
class RemoteMgr;
} // namespace eager
class ProfilerServer;
// LINT.IfChange
// Note: Keep in sync with exported copy of enum in eager/c_api.h.
enum ContextDevicePlacementPolicy {
@ -599,6 +601,9 @@ class EagerContext : public core::RefCounted {
std::shared_ptr<WorkerSession> worker_session_;
std::unique_ptr<eager::EagerClientCache> remote_eager_workers_;
// Starts a thread for profiling service.
std::unique_ptr<ProfilerServer> profiler_server_;
mutex remote_state_mu_;
uint64 context_id_ GUARDED_BY(remote_state_mu_);

View File

@ -30,6 +30,7 @@ cc_library(
"//tensorflow:grpc++",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core/profiler:profiler_service_proto_cc",
"@com_google_absl//absl/strings",
],

View File

@ -23,13 +23,14 @@ limitations under the License.
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
#include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
#include "tensorflow/core/util/env_var.h"
#include "tensorflow/core/util/ptr_util.h"
namespace tensorflow {
std::unique_ptr<Thread> StartProfilerServer(int32 port) {
void ProfilerServer::StartProfilerServer(int32 port) {
Env* env = Env::Default();
return WrapUnique(env->StartThread({}, "profiler server", [port]() {
auto start_server = [port, this]() {
string server_address = absl::StrCat("0.0.0.0:", port);
std::unique_ptr<grpc::ProfilerService::Service> service =
CreateProfilerService();
@ -37,10 +38,36 @@ std::unique_ptr<Thread> StartProfilerServer(int32 port) {
builder.AddListeningPort(server_address,
::grpc::InsecureServerCredentials());
builder.RegisterService(service.get());
std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
server_ = builder.BuildAndStart();
LOG(INFO) << "Profiling Server listening on " << server_address;
server->Wait();
}));
server_->Wait();
};
server_thread_ =
WrapUnique(env->StartThread({}, "ProfilerServer", start_server));
}
void ProfilerServer::MaybeStartProfilerServer() {
int64 profiler_port;
// The implementation of ReadInt64FromEnvVar guaranteed that the output
// argument will be set to default value failure.
Status s = ReadInt64FromEnvVar("TF_PROFILER_PORT", -1, &profiler_port);
if (!s.ok()) {
LOG(WARNING) << "StartProfilerServer: " << s.error_message();
}
if (profiler_port < 1024 || profiler_port > 49151) {
// Disable the log message if profiler_port is -1 to prevent spam the
// terminal for TF user who doesn't set a profiler port.
if (profiler_port == -1) return;
LOG(WARNING)
<< "Profiler server not started. TF_PROFILER_PORT: " << profiler_port
<< " is out of the valid registered port range (1024 to 49151).";
return;
}
StartProfilerServer(profiler_port);
}
ProfilerServer::~ProfilerServer() {
if (server_) server_->Shutdown();
}
} // namespace tensorflow

View File

@ -17,13 +17,26 @@ limitations under the License.
#include <memory>
#include "grpcpp/grpcpp.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
class Thread;
std::unique_ptr<Thread> StartProfilerServer(int32 port);
class ProfilerServer {
public:
~ProfilerServer();
// If TF_PROFILER_PORT is defined, starts a profiler server with the
// specified port. Otherwise, don't start a profiler server
void MaybeStartProfilerServer();
// Starts a profiler server with a given port.
void StartProfilerServer(int32 port);
private:
std::unique_ptr<::grpc::Server> server_;
std::unique_ptr<Thread> server_thread_;
};
} // namespace tensorflow