diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index c34a84fcfee..9f09ad1fc30 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -148,6 +148,62 @@ tf_cuda_cc_test( ], ) +tf_cuda_library( + name = "c_api_experimental", + srcs = [ + "c_api_experimental.cc", + ], + hdrs = ["c_api_experimental.h"], + copts = tf_copts() + tfe_xla_copts(), + visibility = ["//visibility:public"], + deps = select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib_lite", + ], + "//conditions:default": [ + ":c_api", + ":c_api_internal", + "//tensorflow/c:c_api", + "//tensorflow/c:c_api_internal", + "//tensorflow/core:core_cpu", + "//tensorflow/core/common_runtime/eager:attr_builder", + "//tensorflow/core/common_runtime/eager:context", + "//tensorflow/core/common_runtime/eager:eager_executor", + "//tensorflow/core/common_runtime/eager:execute", + "//tensorflow/core/common_runtime/eager:kernel_and_device", + "//tensorflow/core/common_runtime/eager:tensor_handle", + "//tensorflow/core/common_runtime/eager:copy_to_device_node", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + ], + }) + select({ + "//tensorflow:with_xla_support": [ + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/jit", + "//tensorflow/compiler/jit:xla_device", + ], + "//conditions:default": [], + }) + [ + "@com_google_absl//absl/memory", + "//tensorflow/core/common_runtime/eager:eager_operation", + "//tensorflow/core/distributed_runtime/eager:eager_client", + "//tensorflow/core/distributed_runtime/rpc/eager:grpc_eager_client", + "//tensorflow/core/distributed_runtime/rpc:grpc_channel", + "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib", + "//tensorflow/core/distributed_runtime/rpc:grpc_worker_cache", + "//tensorflow/core/distributed_runtime/rpc:grpc_worker_service", + "//tensorflow/core/distributed_runtime/rpc:rpc_rendezvous_mgr", + "//tensorflow/core/distributed_runtime:remote_device", + "//tensorflow/core/distributed_runtime:server_lib", + "//tensorflow/core/distributed_runtime:worker_env", + "//tensorflow/core:gpu_runtime", + ], +) + cc_library( name = "tape", hdrs = ["tape.h"], diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc new file mode 100644 index 00000000000..3461d81b935 --- /dev/null +++ b/tensorflow/c/eager/c_api_experimental.cc @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/eager/c_api_experimental.h" + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/eager/c_api_internal.h" + +void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { + op->operation.ConsumeInput(h->handle); +} diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h new file mode 100644 index 00000000000..4ee6c066eef --- /dev/null +++ b/tensorflow/c/eager/c_api_experimental.h @@ -0,0 +1,32 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ +#define TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ + +#include "tensorflow/c/c_api.h" +#include "tensorflow/c/eager/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, + TF_Status* status); + +#ifdef __cplusplus +} /* end extern "C" */ +#endif + +#endif // TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_ diff --git a/tensorflow/core/common_runtime/eager/eager_operation.cc b/tensorflow/core/common_runtime/eager/eager_operation.cc index 381b05ada85..0718e689323 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.cc +++ b/tensorflow/core/common_runtime/eager/eager_operation.cc @@ -30,4 +30,9 @@ void EagerOperation::AddInput(tensorflow::TensorHandle* h) { inputs_.push_back(h); attrs_.NumInputs(static_cast(inputs_.size())); } + +void EagerOperation::ConsumeInput(tensorflow::TensorHandle* h) { + inputs_.push_back(h); + attrs_.NumInputs(static_cast(inputs_.size())); +} } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eager/eager_operation.h b/tensorflow/core/common_runtime/eager/eager_operation.h index 935ca7f9aa7..5a9e1f0292e 100644 --- a/tensorflow/core/common_runtime/eager/eager_operation.h +++ b/tensorflow/core/common_runtime/eager/eager_operation.h @@ -53,6 +53,7 @@ class EagerOperation { return &inputs_; } void AddInput(tensorflow::TensorHandle* h); + void ConsumeInput(tensorflow::TensorHandle* h); const tensorflow::string& Name() const { return name_; } const tensorflow::AttrTypeMap* AttrTypes() const { return attr_types_; } diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index 783baa96c92..79806c3c732 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -712,22 +712,37 @@ Status EagerExecute(EagerContext* ctx, Device* device, std::vector outputs(1); const MemoryTypeVector* output_memory_types = nullptr; output_memory_types = &kernel->kernel()->output_memory_types(); - std::vector inputs(op_inputs.size()); + + // If there are multiple references to a TensorHandle in 'op_inputs' we must + // increment the reference count of the corresponding Tensor or risk it being + // overwritten during kernel execution. The reference count is incremented + // below when we insert a copy of the Tensor into protected_tensors, and will + // be decremented once execution is complete. + std::vector protected_tensors; for (int i = 0; i < op_inputs.size(); ++i) { - const Tensor* input_tensor = nullptr; - TF_RETURN_IF_ERROR(op_inputs[i]->Tensor(&input_tensor)); - inputs[i] = *input_tensor; + if (!op_inputs[i]->RefCountIsOne()) { + const Tensor* input_tensor = nullptr; + TF_RETURN_IF_ERROR(op_inputs[i]->Tensor(&input_tensor)); + protected_tensors.push_back(*input_tensor); + } } + + gtl::InlinedVector input_vector(op_inputs.size()); + for (int i = 0; i < op_inputs.size(); ++i) { + TF_RETURN_IF_ERROR(op_inputs[i]->TensorValue(&input_vector[i])); + } + // TODO(apassos) figure out how to record stats for ops which are a part of // functions. // TODO(agarwal): change Run to take vector of handles ? ScopedStepContainer* container = ctx->StepContainer(); if (container == nullptr) { - TF_RETURN_IF_ERROR(kernel->Run(&inputs, &outputs, maybe_stats, + TF_RETURN_IF_ERROR(kernel->Run(input_vector, &outputs, maybe_stats, maybe_step_stats, graph_collector)); } else { - TF_RETURN_IF_ERROR(kernel->Run(container, &inputs, &outputs, maybe_stats, - maybe_step_stats, graph_collector)); + TF_RETURN_IF_ERROR(kernel->Run(container, input_vector, &outputs, + maybe_stats, maybe_step_stats, + graph_collector)); } if (maybe_stats != nullptr) { int64 nanos = Env::Default()->NowNanos(); diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 317e9a16074..354f96c440c 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -57,7 +57,7 @@ Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flr, return OutputTypesForNode(ndef, *op_def, &out->output_dtypes_); } -Status KernelAndDevice::Run(std::vector* inputs, +Status KernelAndDevice::Run(const gtl::InlinedVector& inputs, std::vector* outputs, NodeExecStats* stats, StepStats* step_stats, GraphCollector* graph_collector) { @@ -69,15 +69,10 @@ Status KernelAndDevice::Run(std::vector* inputs, } Status KernelAndDevice::Run(ScopedStepContainer* step_container, - std::vector* inputs, + const gtl::InlinedVector& inputs, std::vector* outputs, NodeExecStats* stats, StepStats* step_stats, GraphCollector* graph_collector) { - gtl::InlinedVector input_vector; - for (Tensor& t : *inputs) { - input_vector.push_back(TensorValue(&t)); - } - std::vector out_attrs(kernel_->num_outputs()); for (size_t i = 0; i < out_attrs.size(); ++i) { out_attrs[i].set_on_host(kernel_->output_memory_types()[i] == @@ -85,7 +80,7 @@ Status KernelAndDevice::Run(ScopedStepContainer* step_container, } gtl::InlinedVector input_device_contexts; - for (int i = 0; i < inputs->size(); i++) { + for (int i = 0; i < inputs.size(); i++) { DeviceContext* device_context = nullptr; if (device_->tensorflow_gpu_device_info() != nullptr) { device_context = device_->tensorflow_gpu_device_info()->default_context; @@ -96,7 +91,7 @@ Status KernelAndDevice::Run(ScopedStepContainer* step_container, OpKernelContext::Params params; params.device = device_; params.frame_iter = FrameAndIter(0, 0); - params.inputs = &input_vector; + params.inputs = &inputs; params.op_kernel = kernel_.get(); params.resource_manager = device_->resource_manager(); params.output_attr_array = gtl::vector_as_array(&out_attrs); diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h index ee430b7fc70..f7a5b76518d 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.h +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h @@ -68,11 +68,12 @@ class KernelAndDevice { collective_executor_(std::move(collective_executor)) {} // TODO(ashankar): Handle list-valued inputs. - Status Run(std::vector* inputs, std::vector* outputs, - NodeExecStats* stats, StepStats* step_stats, - GraphCollector* graph_collector); + Status Run(const gtl::InlinedVector& inputs, + std::vector* outputs, NodeExecStats* stats, + StepStats* step_stats, GraphCollector* graph_collector); - Status Run(ScopedStepContainer* step_container, std::vector* inputs, + Status Run(ScopedStepContainer* step_container, + const gtl::InlinedVector& inputs, std::vector* outputs, NodeExecStats* stats, StepStats* step_stats, GraphCollector* graph_collector); diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc index 3ffed3ce321..8d6db967798 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device_test.cc @@ -118,9 +118,9 @@ BENCHMARK(BM_KernelAndDeviceInit); void BM_KernelAndDeviceRun(int iters) { tensorflow::testing::StopTiming(); Tensor t(Input({{1.0f, 2.0f}, {3.0f, 4.0f}}).tensor()); - std::vector inputs; - inputs.push_back(t); - inputs.push_back(t); + gtl::InlinedVector inputs; + inputs.push_back(TensorValue(&t)); + inputs.push_back(TensorValue(&t)); std::vector outputs; NodeDef ndef(AttrBuilder("MatMul") .Set("T", DT_FLOAT) @@ -134,7 +134,7 @@ void BM_KernelAndDeviceRun(int iters) { nullptr, &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr, nullptr, nullptr)); + TF_CHECK_OK(kernel.Run(inputs, &outputs, nullptr, nullptr, nullptr)); } } BENCHMARK(BM_KernelAndDeviceRun); diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc index 0acd1609361..47a856a36e7 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.cc +++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" -#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -79,6 +78,13 @@ Status TensorHandle::Tensor(const tensorflow::Tensor** t) { return Status::OK(); } +Status TensorHandle::TensorValue(tensorflow::TensorValue* t) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *t = tensorflow::TensorValue(&tensor_); + return Status::OK(); +} + Status TensorHandle::TensorAndDevice(const tensorflow::Tensor** tensor, tensorflow::Device** device, tensorflow::Device** op_device) { diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index 0fdd31ab5fc..1c81087f1f3 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -27,7 +27,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/eager/context.h" #include "tensorflow/core/common_runtime/eager/eager_executor.h" -#include "tensorflow/core/common_runtime/eager/kernel_and_device.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" @@ -102,6 +101,8 @@ class TensorHandle : public core::RefCounted { Status Tensor(const tensorflow::Tensor** t); + Status TensorValue(tensorflow::TensorValue* t); + tensorflow::Device* device() const { return device_; } tensorflow::Device* op_device() const { return op_device_; }