Implemented buffer interface for EagerTensor

While this change makes EagerTensor.numpy __array__ redundant, it does not remove __array__ due to numpy/numpy#13507. Note also that unlike __array__, the buffer interface does not lead to a performance regression when np.array infers the dimensionality of [tensor]. See #27692 and numpy/numpy#8562 for details. PiperOrigin-RevId: 249888195
2019-05-24 13:22:45 -07:00 · 2019-05-24 13:22:45 -07:00 · e71b36a97a
commit e71b36a97a
parent ae96bb6c65
6 changed files with 160 additions and 57 deletions
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@ -993,6 +993,23 @@ const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
  return t;
 }
 TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
                                                     TF_Status* status) {
  // TensorHandles created by PyFuncOp lack context and therefore could
  // not be copied.
  if (!h->handle->OnHostCPU() && h->handle->Context() != nullptr) {
    tensorflow::TensorHandle* handle;
    status->status = tensorflow::EagerCopyToDevice(
        h->handle, h->handle->Context(), "CPU:0", &handle);
    if (status->status.ok()) {
      return new TFE_TensorHandle(handle);
    } else {
      return nullptr;
    }
  }
  return h;
 }
 void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
                                  TF_Status* status) {
  TFE_ContextAsyncWait(ctx, status);
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@ -462,6 +462,9 @@ class Tensor;
 const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
    TFE_TensorHandle* h, TF_Status* status);
 TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
                                                     TF_Status* status);
 TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t);
 #endif
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@ -19,6 +19,7 @@ limitations under the License.
 #include "structmember.h"  // NOLINT // For PyMemberDef
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/c/eager/c_api_internal.h"
 #include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@ -69,6 +70,56 @@ TFE_TensorHandle* NumpyToTensorHandle(PyObject* obj) {
  }
 }
 // Convert a TFE_TensorHandle to a Python numpy.ndarray object.
 // The two may share underlying storage so changes to one may reflect in the
 // other.
 PyObject* TensorHandleToNumpy(TFE_TensorHandle* handle) {
  auto status = tensorflow::make_safe(TF_NewStatus());
  const tensorflow::Tensor* t =
      TFE_TensorHandleUnderlyingTensorInHostMemory(handle, status.get());
  if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_RuntimeError)) {
    // TODO(slebedev): emit a better error message if a Tensor is on GPU?
    return nullptr;
  }
  // HACK(slebedev): The following explains why TensorToNdarray never
  // reuses the storage.
  //
  // TF_TensorToPyArray copies the storage unless its
  // refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
  // has to copy so the refcount of the original storage is unchanged.
  // However, if the storage can be reused by TF_TensorFromTensor its
  // refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
  //
  // Here we attempt a direct conversion without an intermediate TF_Tensor
  // and fall-back to the slow path on failure.
  PyObject* ret = nullptr;
  if (t->dtype() != tensorflow::DT_STRING &&
      t->dtype() != tensorflow::DT_RESOURCE) {
    tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
    for (int d = 0; d < t->dims(); ++d) {
      dims[d] = t->dim_size(d);
    }
    auto* copy = new tensorflow::Tensor(*t);
    char* data = const_cast<char*>(copy->tensor_data().data());
    if (tensorflow::ArrayFromMemory(
            dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
            &ret)
            .ok()) {
      return ret;
    }
  }
  auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
  if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
    Py_XDECREF(ret);
    return nullptr;
  } else {
    return ret;
  }
 }
 TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx,
                               PyObject* dev) {
  const char* device = "";
@ -599,6 +650,7 @@ static int EagerTensor_settensor_shape(EagerTensor* self, PyObject* value,
  self->tensor_shape = value;
  return 0;
 }
 // Function `_copy_to_device`.
 static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
                                            PyObject* kwds) {
@ -620,50 +672,10 @@ static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
 // other.
 // Note that if `self` is not on CPU, we raise an Exception.
 static PyObject* EagerTensor_numpy(EagerTensor* self) {
-  auto status = tensorflow::make_safe(TF_NewStatus());
+  PyObject* ret = TensorHandleToNumpy(self->handle);
-  const tensorflow::Tensor* t =
+  return (ret == nullptr)
-      TFE_TensorHandleUnderlyingTensorInHostMemory(self->handle, status.get());
+             ? nullptr
-  if (TF_GetCode(status.get()) != TF_OK) {
+             : PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
    PyErr_SetString(PyExc_RuntimeError, TF_Message(status.get()));
    return nullptr;
  }
  // HACK(slebedev): The following explains why TensorToNdarray never
  // reuses the storage.
  //
  // TF_TensorToPyArray copies the storage unless its
  // refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
  // has to copy so the refcount of the original storage is unchanged.
  // However, if the storage can be reused by TF_TensorFromTensor its
  // refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
  //
  // Here we attempt a direct conversion without an intermediate TF_Tensor
  // and fall-back to the slow path on failure.
  PyObject* ret = nullptr;
  if (t->dtype() != tensorflow::DT_STRING &&
      t->dtype() != tensorflow::DT_RESOURCE) {
    tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
    for (int d = 0; d < t->dims(); ++d) {
      dims[d] = t->dim_size(d);
    }
    auto* copy = new tensorflow::Tensor(*t);
    char* data = const_cast<char*>(copy->tensor_data().data());
    if (tensorflow::ArrayFromMemory(
            dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
            &ret)
            .ok()) {
      return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
    }
  }
  auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
  if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
    Py_XDECREF(ret);
    return nullptr;
  } else {
    return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
  }
 }
 // Getter `device`.
@ -737,6 +749,51 @@ static PyMethodDef EagerTensor_methods[] = {
    {nullptr, nullptr},
 };
 static int EagerTensor_getbuffer(EagerTensor* self, Py_buffer* view,
                                 int flags) {
  if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE) {
    PyErr_SetString(PyExc_BufferError, "EagerTensor is not writable.");
    return -1;
  }
  auto status = tensorflow::make_safe(TF_NewStatus());
  TFE_TensorHandle* handle =
      TFE_TensorHandleMaybeCopyToHostCPU(self->handle, status.get());
  if (TF_GetCode(status.get()) != TF_OK) {
    PyErr_SetString(PyExc_BufferError,
                    tensorflow::strings::StrCat("Error copying tensor to CPU:",
                                                TF_Message(status.get()))
                        .c_str());
    return -1;
  }
  // TensorHandleToNumpy is zero-copy for everything but DT_RESOURCE and
  // DT_STRING so the following is only slightly slower than a NumPy-free
  // implementation.
  auto py_array = tensorflow::make_safe(TensorHandleToNumpy(handle));
  if (py_array == nullptr ||
      PyObject_GetBuffer(py_array.get(), view, flags) < 0) {
    return -1;
  }
  view->readonly = 1;
  int num_dims = TFE_TensorHandleNumDims(handle, status.get());
  if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_BufferError)) {
    return -1;
  }
  DCHECK(view->ndim == num_dims);
  return 0;
 }
 static PyBufferProcs EagerTensor_as_buffer = {
 #if PY_MAJOR_VERSION < 3
    nullptr, nullptr, nullptr, nullptr,
 #endif
    (getbufferproc)EagerTensor_getbuffer,
    // Never called because getbufferproc delegates to NumPy.
    (releasebufferproc) nullptr};
 // Note that here we are trying to dynamically create a new class as a subclass
 // of a "HEAPTYPE" class that is itself created in python code and passed in at
 // runtime. This is fairly atypical and undocumented.
@ -764,6 +821,9 @@ static PyType_Slot EagerTensor_Type_slots[] = {
    {0, nullptr},
 };
 #else
 #define EAGER_TENSOR_TPFLAGS (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_NEWBUFFER)
 // TODO(agarwal): support active_trace.
 static PyTypeObject _EagerTensorType = {
    // clang-format off
@ -786,8 +846,8 @@ static PyTypeObject _EagerTensorType = {
    nullptr,                            /* tp_str */
    nullptr,                            /* tp_getattro */
    nullptr,                            /* tp_setattro */
-    nullptr,                            /* tp_as_buffer */
+    &EagerTensor_as_buffer,             /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    EAGER_TENSOR_TPFLAGS,               /* tp_flags */
    nullptr,                            /* tp_doc */
    nullptr,                            /* tp_traverse */
    nullptr,                            /* tp_clear */
@ -946,6 +1006,7 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) {
    return nullptr;
  }
  EagerTensorType->tp_dictoffset = offsetof(EagerTensor, dict);
  EagerTensorType->tp_as_buffer = &EagerTensor_as_buffer;
 #else
  _EagerTensorType.tp_base = base_class_type;
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@ -387,6 +387,34 @@ class TFETensorTest(test_util.TensorFlowTestCase):
      self.assertEqual(
          constant_op.constant(t.min, dtype=t).numpy(), t.min)
  def test_numpyIsView(self):
    t = constant_op.constant([0.0])
    t._numpy()[0] = 42.0
    self.assertAllClose(t, constant_op.constant([42.0]))
  def testMemoryviewIsReadonly(self):
    t = constant_op.constant([0.0])
    self.assertTrue(memoryview(t).readonly)
  @test_util.assert_no_new_pyobjects_executing_eagerly
  def testMemoryviewScalar(self):
    t = constant_op.constant(42.0)
    self.assertAllEqual(
        np.array(memoryview(t)), np.array(42.0, dtype=np.float32))
  @test_util.assert_no_new_pyobjects_executing_eagerly
  def testMemoryviewEmpty(self):
    t = constant_op.constant([], dtype=np.float32)
    self.assertAllEqual(np.array(memoryview(t)), np.array([]))
  @test_util.run_gpu_only
  @test_util.assert_no_new_pyobjects_executing_eagerly
  def testMemoryviewCopyToCPU(self):
    with ops.device("/device:GPU:0"):
      t = constant_op.constant([0.0])
    self.assertAllEqual(
        np.array(memoryview(t)), np.array([0.0], dtype=np.float32))
 class TFETensorUtilTest(test_util.TensorFlowTestCase):
@ -499,11 +527,6 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
        ValueError, "non-rectangular Python sequence"):
      constant_op.constant(l)
  def test_numpyIsView(self):
    t = constant_op.constant([0.0])
    t._numpy()[0] = 42.0
    self.assertAllClose(t, constant_op.constant([42.0]))
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@ -780,6 +780,9 @@ class _EagerTensorBase(Tensor):
    return int(maybe_arr)  # Must be a NumPy scalar.
  def __array__(self, dtype=None):
    # This is only called if the buffer interface conversion failed.
    # Remove once numpy/numpy#13507 is merged and released or py_function
    # creates EagerTensors with a non-nullptr context.
    return np.asarray(self.numpy(), dtype=dtype)
  def __format__(self, format_spec):
--- a/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor_bridge.cc
@ -188,13 +188,9 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype,
 Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
                       std::function<void()> destructor, PyObject** result) {
-  int size = 1;
+  if (dtype == DT_STRING || dtype == DT_RESOURCE) {
  for (int i = 0; i < dim_size; ++i) {
    size *= dims[i];
  }
  if (dtype == DT_STRING || dtype == DT_RESOURCE || size == 0) {
    return errors::FailedPrecondition(
-        "Cannot convert strings, resources, or empty Tensors.");
+        "Cannot convert string or resource Tensors.");
  }
  int type_num = -1;