Remove implicit mirroring toggle
Implicit mirroring is set to true by default already and is essential for eager performance. This CL just removes dead code since there is no API to disable mirroring for tensors. We also shouldn't have this in the TensorHandleInterface class since mirroring is a runtime-specific implementation detail. PiperOrigin-RevId: 304421014 Change-Id: I383fa24da08a86028cabb3a4b1c5f2612d57336d
This commit is contained in:
parent
8381274593
commit
907a55ebad
@ -580,12 +580,6 @@ void TFE_HostAddressSpace(TFE_Context* ctx, TF_Buffer* buf) {
|
||||
};
|
||||
}
|
||||
|
||||
void TFE_TensorHandleEnableImplicitMirroring(TFE_TensorHandle* h,
|
||||
TF_Status* status) {
|
||||
h->handle->EnableImplicitMirroring();
|
||||
status->status = tensorflow::Status::OK();
|
||||
}
|
||||
|
||||
void TFE_ContextGetFunctionDef(TFE_Context* ctx, const char* function_name,
|
||||
TF_Buffer* buf, TF_Status* status) {
|
||||
tensorflow::EagerContext* context =
|
||||
|
@ -392,12 +392,6 @@ TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx,
|
||||
TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context* ctx,
|
||||
TF_Status* status);
|
||||
|
||||
// If the TensorHandle is copied to another device as part of an op execution,
|
||||
// the copy is destroyed after the op has executed. Enabling implicit mirroring
|
||||
// causes the copy to be held as a mirror for the lifetime of the TensorHandle.
|
||||
TF_CAPI_EXPORT extern void TFE_TensorHandleEnableImplicitMirroring(
|
||||
TFE_TensorHandle*, TF_Status*);
|
||||
|
||||
// This function will block till the operation that produces `h` has
|
||||
// completed. This is only valid on local TFE_TensorHandles. The pointer
|
||||
// returned will be on the device in which the TFE_TensorHandle resides (so e.g.
|
||||
|
@ -168,8 +168,6 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
|
||||
auto* h1_task2 =
|
||||
TFE_TensorHandleCopyToDevice(h1_task0, ctx, task2_name, status);
|
||||
ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
|
||||
TFE_TensorHandleEnableImplicitMirroring(h1_task2, status);
|
||||
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
|
||||
|
||||
// Handles are on task0 (local), and task2, but op is on task1.
|
||||
TFE_Op* matmul = MatMulOp(ctx, h0_task0, h1_task2);
|
||||
|
@ -594,7 +594,6 @@ void ExecuteAdd(bool async, bool forward_input) {
|
||||
TFE_TensorHandle* n_gpu =
|
||||
TFE_TensorHandleCopyToDevice(n, ctx, gpu_device_name.c_str(), status);
|
||||
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
|
||||
TFE_TensorHandleEnableImplicitMirroring(n_gpu, status);
|
||||
TFE_DeleteTensorHandle(n);
|
||||
n = n_gpu;
|
||||
}
|
||||
|
@ -59,14 +59,6 @@ class AbstractTensorHandleInterface {
|
||||
// Return a copy of the handle.
|
||||
virtual AbstractTensorHandleInterface* Copy() = 0;
|
||||
|
||||
// Maintain mirror tensors for any implicit copies to local devices. This
|
||||
// setting is offered on a per tensor handle basis to avoid potential memory
|
||||
// over utilization due to holding on to mirrors as well as the original
|
||||
// tensor. Note this setting overrides the context mirroring policy whereby if
|
||||
// the mirroring policy is MIRRORING_NONE, we will still continue to mirror
|
||||
// this tensor.
|
||||
virtual void EnableImplicitMirroring() = 0;
|
||||
|
||||
protected:
|
||||
virtual ~AbstractTensorHandleInterface() {}
|
||||
};
|
||||
|
@ -83,13 +83,12 @@ AbstractTensorInterface* TensorHandle::Resolve(Status* status) {
|
||||
} else {
|
||||
*status = CopyToDevice(*ctx_, ctx_->HostCPU(), &tensor);
|
||||
if (!status->ok()) return nullptr;
|
||||
if (ImplicitMirroring()) {
|
||||
*status = AddEmptyLocalMirror(nullptr);
|
||||
if (!status->ok()) return nullptr;
|
||||
tensorflow::Tensor mirror = tensor;
|
||||
*status = SetTensor(std::move(mirror), nullptr);
|
||||
if (!status->ok()) return nullptr;
|
||||
}
|
||||
|
||||
*status = AddEmptyLocalMirror(nullptr);
|
||||
if (!status->ok()) return nullptr;
|
||||
tensorflow::Tensor mirror = tensor;
|
||||
*status = SetTensor(std::move(mirror), nullptr);
|
||||
if (!status->ok()) return nullptr;
|
||||
}
|
||||
// TODO(b/153052876): Change TF_TensorFromTensor to just return an
|
||||
// AbstractTensorInterface
|
||||
|
@ -157,9 +157,9 @@ Status CopyInputToExpectedDevice(EagerContext* ctx, EagerOperation* op,
|
||||
" to ", expected_input_device->name());
|
||||
},
|
||||
profiler::TraceMeLevel::kInfo);
|
||||
Status status = EagerCopyToDevice(
|
||||
handle, ctx, &op->Executor(), expected_input_device,
|
||||
handle->ImplicitMirroring() || ctx->MirrorTensors(), &result_handle);
|
||||
Status status =
|
||||
EagerCopyToDevice(handle, ctx, &op->Executor(), expected_input_device,
|
||||
/* mirror= */ true, &result_handle);
|
||||
activity.Stop();
|
||||
if (!status.ok()) {
|
||||
return errors::Internal("Failed copying input tensor from ",
|
||||
@ -416,7 +416,7 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
|
||||
TensorHandle* handle = nullptr;
|
||||
TF_RETURN_IF_ERROR(EagerCopyToDevice(
|
||||
input, &ctx, &executor, device == nullptr ? ctx.HostCPU() : device,
|
||||
input->ImplicitMirroring() || ctx.MirrorTensors(), &handle));
|
||||
/* mirror= */ true, &handle));
|
||||
op->UpdateInput(i, handle);
|
||||
// Unref handle since it has a ref as an input now
|
||||
handle->Unref();
|
||||
|
@ -137,7 +137,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
|
||||
op_device_(op_device),
|
||||
resource_device_(resource_device),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
|
||||
DVLOG(3) << "Creating Local TensorHandle: " << this
|
||||
<< " device: " << VariantDeviceDebugString(device_)
|
||||
@ -152,7 +151,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, Device* d, Device* op_device,
|
||||
resource_device_(
|
||||
GetResourceDevice(t.flat<class ResourceHandle>()(0), ctx)),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
resource_handle_info_(
|
||||
{t.flat<class ResourceHandle>()(0).dtypes_and_shapes(),
|
||||
t.flat<class ResourceHandle>()(0).allowed_devices()}),
|
||||
@ -169,7 +167,6 @@ TensorHandle::TensorHandle(tensorflow::Tensor&& t, CustomDevice* d,
|
||||
op_device_(nullptr),
|
||||
resource_device_(nullptr),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
data_(absl::in_place_type<LocalTensorHandleData>, std::move(t)) {
|
||||
// TODO(allenl): Figure out a better op_device story for custom devices,
|
||||
// since always setting it to CPU=nullptr doesn't make much sense.
|
||||
@ -193,7 +190,6 @@ TensorHandle::TensorHandle(Device* d, Device* op_device,
|
||||
op_device_(op_device),
|
||||
resource_device_(resource_device),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
data_(absl::in_place_type<LocalTensorHandleData>) {
|
||||
DVLOG(3) << "Creating empty Local TensorHandle: " << this
|
||||
<< " device: " << VariantDeviceDebugString(device_);
|
||||
@ -215,7 +211,6 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
|
||||
op_device_(d),
|
||||
resource_device_(dtype == DT_RESOURCE ? d : nullptr),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
|
||||
remote_task, ctx) {
|
||||
DVLOG(3) << "Creating Unshaped Remote TensorHandle: " << this
|
||||
@ -238,7 +233,6 @@ TensorHandle::TensorHandle(int64 op_id, int32 output_num,
|
||||
op_device_(d),
|
||||
resource_device_(dtype == DT_RESOURCE ? d : nullptr),
|
||||
ctx_(ctx),
|
||||
implicit_mirroring_(true),
|
||||
data_(absl::in_place_type<RemoteTensorHandleData>, op_id, output_num,
|
||||
ctx->GetContextViewId()) {
|
||||
DVLOG(3) << "Creating Lazy Remote TensorHandle: " << this
|
||||
|
@ -115,8 +115,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
|
||||
|
||||
AbstractTensorHandleInterface* Copy() override;
|
||||
|
||||
void EnableImplicitMirroring() override { implicit_mirroring_ = true; }
|
||||
|
||||
// Return the Tensor from the default device.
|
||||
Status Tensor(const tensorflow::Tensor** t) const;
|
||||
// Return the Tensor from the specified device which could be either the
|
||||
@ -207,7 +205,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
|
||||
const tensorflow::DataType dtype;
|
||||
|
||||
bool IsRemote() const;
|
||||
bool ImplicitMirroring() const { return implicit_mirroring_; }
|
||||
|
||||
string DebugString() const;
|
||||
|
||||
@ -276,7 +273,6 @@ class TensorHandle : public AbstractTensorHandleInterface,
|
||||
// Does not need synchronization because it can be accessed only after
|
||||
// WaitReady() has returned. At that point, is_poisoned_ is immutable.
|
||||
Status is_poisoned_;
|
||||
bool implicit_mirroring_;
|
||||
|
||||
// If this TensorHandle 1) is a local tensor, and 2) is a resource handle or
|
||||
// refers to a remote resource handle, we store data types, shapes and allowed
|
||||
|
@ -292,15 +292,6 @@ TFE_TensorHandle* ConvertToEagerTensorUncached(TFE_Context* ctx,
|
||||
}
|
||||
}
|
||||
|
||||
// We always enable implicit mirroring for constants. Without this, code
|
||||
// written previously under the assumption that
|
||||
//
|
||||
// with tf.device('GPU:0'): x = tf.constant(1.0)
|
||||
//
|
||||
// will be placed in the GPU will suffer a non-trivial performance regression
|
||||
// (measured at ~20% for certain benchmarks).
|
||||
handle->handle->EnableImplicitMirroring();
|
||||
|
||||
return handle.release();
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user