Add profiler service to eager context so that it is started by default.
PiperOrigin-RevId: 294348740 Change-Id: I99eb430c8a5c1c35ad442987a7c50af3f1f92e29
This commit is contained in:
parent
114762ab9e
commit
1a287cbaee
@ -48,9 +48,11 @@ void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) {
|
||||
}
|
||||
|
||||
void TFE_StartProfilerServer(int port) {
|
||||
// Release child thread intentionally. The child thread can be terminated by
|
||||
// terminating the main thread.
|
||||
tensorflow::StartProfilerServer(port).release();
|
||||
auto profiler_server = absl::make_unique<tensorflow::ProfilerServer>();
|
||||
profiler_server->StartProfilerServer(port);
|
||||
// Release child server thread intentionally. The child thread can be
|
||||
// terminated when the main program exits.
|
||||
profiler_server.release();
|
||||
}
|
||||
|
||||
void TFE_ContextEnableGraphCollection(TFE_Context* ctx) {
|
||||
|
||||
@ -77,6 +77,7 @@ tf_cuda_library(
|
||||
"//tensorflow/core/distributed_runtime:server_lib",
|
||||
"//tensorflow/core/distributed_runtime:worker_session",
|
||||
"//tensorflow/core/distributed_runtime/eager:eager_client",
|
||||
"//tensorflow/core/profiler/rpc:profiler_server",
|
||||
],
|
||||
}),
|
||||
)
|
||||
|
||||
@ -45,6 +45,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h"
|
||||
#include "tensorflow/core/distributed_runtime/device_resolver_distributed.h"
|
||||
#include "tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h"
|
||||
#include "tensorflow/core/profiler/rpc/profiler_server.h"
|
||||
#endif // !IS_MOBILE_PLATFORM
|
||||
#include "tensorflow/core/framework/resource_mgr.h"
|
||||
#include "tensorflow/core/lib/core/blocking_counter.h"
|
||||
@ -110,6 +111,8 @@ EagerContext::EagerContext(
|
||||
|
||||
#if !defined(IS_MOBILE_PLATFORM)
|
||||
context_id_ = kInvalidContextId;
|
||||
profiler_server_ = absl::make_unique<ProfilerServer>();
|
||||
profiler_server_->MaybeStartProfilerServer();
|
||||
#endif // IS_MOBILE_PLATFORM
|
||||
|
||||
std::unique_ptr<DeviceResolverInterface> drl(
|
||||
|
||||
@ -73,6 +73,8 @@ namespace eager {
|
||||
class RemoteMgr;
|
||||
} // namespace eager
|
||||
|
||||
class ProfilerServer;
|
||||
|
||||
// LINT.IfChange
|
||||
// Note: Keep in sync with exported copy of enum in eager/c_api.h.
|
||||
enum ContextDevicePlacementPolicy {
|
||||
@ -599,6 +601,9 @@ class EagerContext : public core::RefCounted {
|
||||
std::shared_ptr<WorkerSession> worker_session_;
|
||||
std::unique_ptr<eager::EagerClientCache> remote_eager_workers_;
|
||||
|
||||
// Starts a thread for profiling service.
|
||||
std::unique_ptr<ProfilerServer> profiler_server_;
|
||||
|
||||
mutex remote_state_mu_;
|
||||
|
||||
uint64 context_id_ GUARDED_BY(remote_state_mu_);
|
||||
|
||||
@ -30,6 +30,7 @@ cc_library(
|
||||
"//tensorflow:grpc++",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core/profiler:profiler_service_proto_cc",
|
||||
"@com_google_absl//absl/strings",
|
||||
],
|
||||
|
||||
@ -23,13 +23,14 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/profiler/profiler_service.grpc.pb.h"
|
||||
#include "tensorflow/core/profiler/rpc/profiler_service_impl.h"
|
||||
#include "tensorflow/core/util/env_var.h"
|
||||
#include "tensorflow/core/util/ptr_util.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
std::unique_ptr<Thread> StartProfilerServer(int32 port) {
|
||||
void ProfilerServer::StartProfilerServer(int32 port) {
|
||||
Env* env = Env::Default();
|
||||
return WrapUnique(env->StartThread({}, "profiler server", [port]() {
|
||||
auto start_server = [port, this]() {
|
||||
string server_address = absl::StrCat("0.0.0.0:", port);
|
||||
std::unique_ptr<grpc::ProfilerService::Service> service =
|
||||
CreateProfilerService();
|
||||
@ -37,10 +38,36 @@ std::unique_ptr<Thread> StartProfilerServer(int32 port) {
|
||||
builder.AddListeningPort(server_address,
|
||||
::grpc::InsecureServerCredentials());
|
||||
builder.RegisterService(service.get());
|
||||
std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
|
||||
server_ = builder.BuildAndStart();
|
||||
LOG(INFO) << "Profiling Server listening on " << server_address;
|
||||
server->Wait();
|
||||
}));
|
||||
server_->Wait();
|
||||
};
|
||||
server_thread_ =
|
||||
WrapUnique(env->StartThread({}, "ProfilerServer", start_server));
|
||||
}
|
||||
|
||||
void ProfilerServer::MaybeStartProfilerServer() {
|
||||
int64 profiler_port;
|
||||
// The implementation of ReadInt64FromEnvVar guaranteed that the output
|
||||
// argument will be set to default value failure.
|
||||
Status s = ReadInt64FromEnvVar("TF_PROFILER_PORT", -1, &profiler_port);
|
||||
if (!s.ok()) {
|
||||
LOG(WARNING) << "StartProfilerServer: " << s.error_message();
|
||||
}
|
||||
if (profiler_port < 1024 || profiler_port > 49151) {
|
||||
// Disable the log message if profiler_port is -1 to prevent spam the
|
||||
// terminal for TF user who doesn't set a profiler port.
|
||||
if (profiler_port == -1) return;
|
||||
LOG(WARNING)
|
||||
<< "Profiler server not started. TF_PROFILER_PORT: " << profiler_port
|
||||
<< " is out of the valid registered port range (1024 to 49151).";
|
||||
return;
|
||||
}
|
||||
StartProfilerServer(profiler_port);
|
||||
}
|
||||
|
||||
ProfilerServer::~ProfilerServer() {
|
||||
if (server_) server_->Shutdown();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
@ -17,13 +17,26 @@ limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "grpcpp/grpcpp.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
class Thread;
|
||||
|
||||
std::unique_ptr<Thread> StartProfilerServer(int32 port);
|
||||
class ProfilerServer {
|
||||
public:
|
||||
~ProfilerServer();
|
||||
// If TF_PROFILER_PORT is defined, starts a profiler server with the
|
||||
// specified port. Otherwise, don't start a profiler server
|
||||
void MaybeStartProfilerServer();
|
||||
// Starts a profiler server with a given port.
|
||||
void StartProfilerServer(int32 port);
|
||||
|
||||
private:
|
||||
std::unique_ptr<::grpc::Server> server_;
|
||||
std::unique_ptr<Thread> server_thread_;
|
||||
};
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user