Implemented buffer interface for EagerTensor

While this change makes EagerTensor.numpy __array__ redundant, it
does not remove __array__ due to numpy/numpy#13507.

Note also that unlike  __array__, the buffer interface does not lead
to a performance regression when np.array infers the dimensionality
of [tensor]. See #27692 and numpy/numpy#8562 for details.

PiperOrigin-RevId: 249888195
This commit is contained in:
Sergei Lebedev 2019-05-24 13:22:45 -07:00 committed by TensorFlower Gardener
parent ae96bb6c65
commit e71b36a97a
6 changed files with 160 additions and 57 deletions

View File

@ -993,6 +993,23 @@ const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
return t;
}
TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
TF_Status* status) {
// TensorHandles created by PyFuncOp lack context and therefore could
// not be copied.
if (!h->handle->OnHostCPU() && h->handle->Context() != nullptr) {
tensorflow::TensorHandle* handle;
status->status = tensorflow::EagerCopyToDevice(
h->handle, h->handle->Context(), "CPU:0", &handle);
if (status->status.ok()) {
return new TFE_TensorHandle(handle);
} else {
return nullptr;
}
}
return h;
}
void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
TF_Status* status) {
TFE_ContextAsyncWait(ctx, status);

View File

@ -462,6 +462,9 @@ class Tensor;
const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
TFE_TensorHandle* h, TF_Status* status);
TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
TF_Status* status);
TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t);
#endif

View File

@ -19,6 +19,7 @@ limitations under the License.
#include "structmember.h" // NOLINT // For PyMemberDef
#include "tensorflow/c/c_api.h"
#include "tensorflow/c/eager/c_api_internal.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/strings/strcat.h"
@ -69,6 +70,56 @@ TFE_TensorHandle* NumpyToTensorHandle(PyObject* obj) {
}
}
// Convert a TFE_TensorHandle to a Python numpy.ndarray object.
// The two may share underlying storage so changes to one may reflect in the
// other.
PyObject* TensorHandleToNumpy(TFE_TensorHandle* handle) {
auto status = tensorflow::make_safe(TF_NewStatus());
const tensorflow::Tensor* t =
TFE_TensorHandleUnderlyingTensorInHostMemory(handle, status.get());
if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_RuntimeError)) {
// TODO(slebedev): emit a better error message if a Tensor is on GPU?
return nullptr;
}
// HACK(slebedev): The following explains why TensorToNdarray never
// reuses the storage.
//
// TF_TensorToPyArray copies the storage unless its
// refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
// has to copy so the refcount of the original storage is unchanged.
// However, if the storage can be reused by TF_TensorFromTensor its
// refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
//
// Here we attempt a direct conversion without an intermediate TF_Tensor
// and fall-back to the slow path on failure.
PyObject* ret = nullptr;
if (t->dtype() != tensorflow::DT_STRING &&
t->dtype() != tensorflow::DT_RESOURCE) {
tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
for (int d = 0; d < t->dims(); ++d) {
dims[d] = t->dim_size(d);
}
auto* copy = new tensorflow::Tensor(*t);
char* data = const_cast<char*>(copy->tensor_data().data());
if (tensorflow::ArrayFromMemory(
dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
&ret)
.ok()) {
return ret;
}
}
auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
Py_XDECREF(ret);
return nullptr;
} else {
return ret;
}
}
TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx,
PyObject* dev) {
const char* device = "";
@ -599,6 +650,7 @@ static int EagerTensor_settensor_shape(EagerTensor* self, PyObject* value,
self->tensor_shape = value;
return 0;
}
// Function `_copy_to_device`.
static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
PyObject* kwds) {
@ -620,50 +672,10 @@ static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
// other.
// Note that if `self` is not on CPU, we raise an Exception.
static PyObject* EagerTensor_numpy(EagerTensor* self) {
auto status = tensorflow::make_safe(TF_NewStatus());
const tensorflow::Tensor* t =
TFE_TensorHandleUnderlyingTensorInHostMemory(self->handle, status.get());
if (TF_GetCode(status.get()) != TF_OK) {
PyErr_SetString(PyExc_RuntimeError, TF_Message(status.get()));
return nullptr;
}
// HACK(slebedev): The following explains why TensorToNdarray never
// reuses the storage.
//
// TF_TensorToPyArray copies the storage unless its
// refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
// has to copy so the refcount of the original storage is unchanged.
// However, if the storage can be reused by TF_TensorFromTensor its
// refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
//
// Here we attempt a direct conversion without an intermediate TF_Tensor
// and fall-back to the slow path on failure.
PyObject* ret = nullptr;
if (t->dtype() != tensorflow::DT_STRING &&
t->dtype() != tensorflow::DT_RESOURCE) {
tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
for (int d = 0; d < t->dims(); ++d) {
dims[d] = t->dim_size(d);
}
auto* copy = new tensorflow::Tensor(*t);
char* data = const_cast<char*>(copy->tensor_data().data());
if (tensorflow::ArrayFromMemory(
dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
&ret)
.ok()) {
return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
}
}
auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
Py_XDECREF(ret);
return nullptr;
} else {
return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
}
PyObject* ret = TensorHandleToNumpy(self->handle);
return (ret == nullptr)
? nullptr
: PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
}
// Getter `device`.
@ -737,6 +749,51 @@ static PyMethodDef EagerTensor_methods[] = {
{nullptr, nullptr},
};
static int EagerTensor_getbuffer(EagerTensor* self, Py_buffer* view,
int flags) {
if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE) {
PyErr_SetString(PyExc_BufferError, "EagerTensor is not writable.");
return -1;
}
auto status = tensorflow::make_safe(TF_NewStatus());
TFE_TensorHandle* handle =
TFE_TensorHandleMaybeCopyToHostCPU(self->handle, status.get());
if (TF_GetCode(status.get()) != TF_OK) {
PyErr_SetString(PyExc_BufferError,
tensorflow::strings::StrCat("Error copying tensor to CPU:",
TF_Message(status.get()))
.c_str());
return -1;
}
// TensorHandleToNumpy is zero-copy for everything but DT_RESOURCE and
// DT_STRING so the following is only slightly slower than a NumPy-free
// implementation.
auto py_array = tensorflow::make_safe(TensorHandleToNumpy(handle));
if (py_array == nullptr ||
PyObject_GetBuffer(py_array.get(), view, flags) < 0) {
return -1;
}
view->readonly = 1;
int num_dims = TFE_TensorHandleNumDims(handle, status.get());
if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_BufferError)) {
return -1;
}
DCHECK(view->ndim == num_dims);
return 0;
}
static PyBufferProcs EagerTensor_as_buffer = {
#if PY_MAJOR_VERSION < 3
nullptr, nullptr, nullptr, nullptr,
#endif
(getbufferproc)EagerTensor_getbuffer,
// Never called because getbufferproc delegates to NumPy.
(releasebufferproc) nullptr};
// Note that here we are trying to dynamically create a new class as a subclass
// of a "HEAPTYPE" class that is itself created in python code and passed in at
// runtime. This is fairly atypical and undocumented.
@ -764,6 +821,9 @@ static PyType_Slot EagerTensor_Type_slots[] = {
{0, nullptr},
};
#else
#define EAGER_TENSOR_TPFLAGS (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_NEWBUFFER)
// TODO(agarwal): support active_trace.
static PyTypeObject _EagerTensorType = {
// clang-format off
@ -786,8 +846,8 @@ static PyTypeObject _EagerTensorType = {
nullptr, /* tp_str */
nullptr, /* tp_getattro */
nullptr, /* tp_setattro */
nullptr, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
&EagerTensor_as_buffer, /* tp_as_buffer */
EAGER_TENSOR_TPFLAGS, /* tp_flags */
nullptr, /* tp_doc */
nullptr, /* tp_traverse */
nullptr, /* tp_clear */
@ -946,6 +1006,7 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) {
return nullptr;
}
EagerTensorType->tp_dictoffset = offsetof(EagerTensor, dict);
EagerTensorType->tp_as_buffer = &EagerTensor_as_buffer;
#else
_EagerTensorType.tp_base = base_class_type;

View File

@ -387,6 +387,34 @@ class TFETensorTest(test_util.TensorFlowTestCase):
self.assertEqual(
constant_op.constant(t.min, dtype=t).numpy(), t.min)
def test_numpyIsView(self):
t = constant_op.constant([0.0])
t._numpy()[0] = 42.0
self.assertAllClose(t, constant_op.constant([42.0]))
def testMemoryviewIsReadonly(self):
t = constant_op.constant([0.0])
self.assertTrue(memoryview(t).readonly)
@test_util.assert_no_new_pyobjects_executing_eagerly
def testMemoryviewScalar(self):
t = constant_op.constant(42.0)
self.assertAllEqual(
np.array(memoryview(t)), np.array(42.0, dtype=np.float32))
@test_util.assert_no_new_pyobjects_executing_eagerly
def testMemoryviewEmpty(self):
t = constant_op.constant([], dtype=np.float32)
self.assertAllEqual(np.array(memoryview(t)), np.array([]))
@test_util.run_gpu_only
@test_util.assert_no_new_pyobjects_executing_eagerly
def testMemoryviewCopyToCPU(self):
with ops.device("/device:GPU:0"):
t = constant_op.constant([0.0])
self.assertAllEqual(
np.array(memoryview(t)), np.array([0.0], dtype=np.float32))
class TFETensorUtilTest(test_util.TensorFlowTestCase):
@ -499,11 +527,6 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
ValueError, "non-rectangular Python sequence"):
constant_op.constant(l)
def test_numpyIsView(self):
t = constant_op.constant([0.0])
t._numpy()[0] = 42.0
self.assertAllClose(t, constant_op.constant([42.0]))
if __name__ == "__main__":
test.main()

View File

@ -780,6 +780,9 @@ class _EagerTensorBase(Tensor):
return int(maybe_arr) # Must be a NumPy scalar.
def __array__(self, dtype=None):
# This is only called if the buffer interface conversion failed.
# Remove once numpy/numpy#13507 is merged and released or py_function
# creates EagerTensors with a non-nullptr context.
return np.asarray(self.numpy(), dtype=dtype)
def __format__(self, format_spec):

View File

@ -188,13 +188,9 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype,
Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
std::function<void()> destructor, PyObject** result) {
int size = 1;
for (int i = 0; i < dim_size; ++i) {
size *= dims[i];
}
if (dtype == DT_STRING || dtype == DT_RESOURCE || size == 0) {
if (dtype == DT_STRING || dtype == DT_RESOURCE) {
return errors::FailedPrecondition(
"Cannot convert strings, resources, or empty Tensors.");
"Cannot convert string or resource Tensors.");
}
int type_num = -1;