Implemented buffer interface for EagerTensor
While this change makes EagerTensor.numpy __array__ redundant, it does not remove __array__ due to numpy/numpy#13507. Note also that unlike __array__, the buffer interface does not lead to a performance regression when np.array infers the dimensionality of [tensor]. See #27692 and numpy/numpy#8562 for details. PiperOrigin-RevId: 249888195
This commit is contained in:
parent
ae96bb6c65
commit
e71b36a97a
@ -993,6 +993,23 @@ const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
|
|||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
|
||||||
|
TF_Status* status) {
|
||||||
|
// TensorHandles created by PyFuncOp lack context and therefore could
|
||||||
|
// not be copied.
|
||||||
|
if (!h->handle->OnHostCPU() && h->handle->Context() != nullptr) {
|
||||||
|
tensorflow::TensorHandle* handle;
|
||||||
|
status->status = tensorflow::EagerCopyToDevice(
|
||||||
|
h->handle, h->handle->Context(), "CPU:0", &handle);
|
||||||
|
if (status->status.ok()) {
|
||||||
|
return new TFE_TensorHandle(handle);
|
||||||
|
} else {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
|
void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
|
||||||
TF_Status* status) {
|
TF_Status* status) {
|
||||||
TFE_ContextAsyncWait(ctx, status);
|
TFE_ContextAsyncWait(ctx, status);
|
||||||
|
@ -462,6 +462,9 @@ class Tensor;
|
|||||||
|
|
||||||
const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
|
const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory(
|
||||||
TFE_TensorHandle* h, TF_Status* status);
|
TFE_TensorHandle* h, TF_Status* status);
|
||||||
|
|
||||||
|
TFE_TensorHandle* TFE_TensorHandleMaybeCopyToHostCPU(TFE_TensorHandle* h,
|
||||||
|
TF_Status* status);
|
||||||
TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t);
|
TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "structmember.h" // NOLINT // For PyMemberDef
|
#include "structmember.h" // NOLINT // For PyMemberDef
|
||||||
#include "tensorflow/c/c_api.h"
|
#include "tensorflow/c/c_api.h"
|
||||||
|
#include "tensorflow/c/eager/c_api_internal.h"
|
||||||
#include "tensorflow/core/framework/types.h"
|
#include "tensorflow/core/framework/types.h"
|
||||||
#include "tensorflow/core/framework/types.pb.h"
|
#include "tensorflow/core/framework/types.pb.h"
|
||||||
#include "tensorflow/core/lib/strings/strcat.h"
|
#include "tensorflow/core/lib/strings/strcat.h"
|
||||||
@ -69,6 +70,56 @@ TFE_TensorHandle* NumpyToTensorHandle(PyObject* obj) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert a TFE_TensorHandle to a Python numpy.ndarray object.
|
||||||
|
// The two may share underlying storage so changes to one may reflect in the
|
||||||
|
// other.
|
||||||
|
PyObject* TensorHandleToNumpy(TFE_TensorHandle* handle) {
|
||||||
|
auto status = tensorflow::make_safe(TF_NewStatus());
|
||||||
|
const tensorflow::Tensor* t =
|
||||||
|
TFE_TensorHandleUnderlyingTensorInHostMemory(handle, status.get());
|
||||||
|
if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_RuntimeError)) {
|
||||||
|
// TODO(slebedev): emit a better error message if a Tensor is on GPU?
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// HACK(slebedev): The following explains why TensorToNdarray never
|
||||||
|
// reuses the storage.
|
||||||
|
//
|
||||||
|
// TF_TensorToPyArray copies the storage unless its
|
||||||
|
// refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
|
||||||
|
// has to copy so the refcount of the original storage is unchanged.
|
||||||
|
// However, if the storage can be reused by TF_TensorFromTensor its
|
||||||
|
// refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
|
||||||
|
//
|
||||||
|
// Here we attempt a direct conversion without an intermediate TF_Tensor
|
||||||
|
// and fall-back to the slow path on failure.
|
||||||
|
PyObject* ret = nullptr;
|
||||||
|
if (t->dtype() != tensorflow::DT_STRING &&
|
||||||
|
t->dtype() != tensorflow::DT_RESOURCE) {
|
||||||
|
tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
|
||||||
|
for (int d = 0; d < t->dims(); ++d) {
|
||||||
|
dims[d] = t->dim_size(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto* copy = new tensorflow::Tensor(*t);
|
||||||
|
char* data = const_cast<char*>(copy->tensor_data().data());
|
||||||
|
if (tensorflow::ArrayFromMemory(
|
||||||
|
dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
|
||||||
|
&ret)
|
||||||
|
.ok()) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
|
||||||
|
if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
|
||||||
|
Py_XDECREF(ret);
|
||||||
|
return nullptr;
|
||||||
|
} else {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx,
|
TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx,
|
||||||
PyObject* dev) {
|
PyObject* dev) {
|
||||||
const char* device = "";
|
const char* device = "";
|
||||||
@ -599,6 +650,7 @@ static int EagerTensor_settensor_shape(EagerTensor* self, PyObject* value,
|
|||||||
self->tensor_shape = value;
|
self->tensor_shape = value;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function `_copy_to_device`.
|
// Function `_copy_to_device`.
|
||||||
static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
|
static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
|
||||||
PyObject* kwds) {
|
PyObject* kwds) {
|
||||||
@ -620,50 +672,10 @@ static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args,
|
|||||||
// other.
|
// other.
|
||||||
// Note that if `self` is not on CPU, we raise an Exception.
|
// Note that if `self` is not on CPU, we raise an Exception.
|
||||||
static PyObject* EagerTensor_numpy(EagerTensor* self) {
|
static PyObject* EagerTensor_numpy(EagerTensor* self) {
|
||||||
auto status = tensorflow::make_safe(TF_NewStatus());
|
PyObject* ret = TensorHandleToNumpy(self->handle);
|
||||||
const tensorflow::Tensor* t =
|
return (ret == nullptr)
|
||||||
TFE_TensorHandleUnderlyingTensorInHostMemory(self->handle, status.get());
|
? nullptr
|
||||||
if (TF_GetCode(status.get()) != TF_OK) {
|
: PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
|
||||||
PyErr_SetString(PyExc_RuntimeError, TF_Message(status.get()));
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// HACK(slebedev): The following explains why TensorToNdarray never
|
|
||||||
// reuses the storage.
|
|
||||||
//
|
|
||||||
// TF_TensorToPyArray copies the storage unless its
|
|
||||||
// refcount is 1. For DT_STRING and DT_RESOURCE TF_TensorFromTensor
|
|
||||||
// has to copy so the refcount of the original storage is unchanged.
|
|
||||||
// However, if the storage can be reused by TF_TensorFromTensor its
|
|
||||||
// refcount is +1'd and hence TF_TensorToPyArray no longer can reuse it.
|
|
||||||
//
|
|
||||||
// Here we attempt a direct conversion without an intermediate TF_Tensor
|
|
||||||
// and fall-back to the slow path on failure.
|
|
||||||
PyObject* ret = nullptr;
|
|
||||||
if (t->dtype() != tensorflow::DT_STRING &&
|
|
||||||
t->dtype() != tensorflow::DT_RESOURCE) {
|
|
||||||
tensorflow::gtl::InlinedVector<npy_intp, 4> dims(t->dims());
|
|
||||||
for (int d = 0; d < t->dims(); ++d) {
|
|
||||||
dims[d] = t->dim_size(d);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto* copy = new tensorflow::Tensor(*t);
|
|
||||||
char* data = const_cast<char*>(copy->tensor_data().data());
|
|
||||||
if (tensorflow::ArrayFromMemory(
|
|
||||||
dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
|
|
||||||
&ret)
|
|
||||||
.ok()) {
|
|
||||||
return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto cppstatus = tensorflow::TensorToNdarray(*t, &ret);
|
|
||||||
if (MaybeRaiseExceptionFromStatus(cppstatus, PyExc_RuntimeError)) {
|
|
||||||
Py_XDECREF(ret);
|
|
||||||
return nullptr;
|
|
||||||
} else {
|
|
||||||
return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Getter `device`.
|
// Getter `device`.
|
||||||
@ -737,6 +749,51 @@ static PyMethodDef EagerTensor_methods[] = {
|
|||||||
{nullptr, nullptr},
|
{nullptr, nullptr},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int EagerTensor_getbuffer(EagerTensor* self, Py_buffer* view,
|
||||||
|
int flags) {
|
||||||
|
if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE) {
|
||||||
|
PyErr_SetString(PyExc_BufferError, "EagerTensor is not writable.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto status = tensorflow::make_safe(TF_NewStatus());
|
||||||
|
TFE_TensorHandle* handle =
|
||||||
|
TFE_TensorHandleMaybeCopyToHostCPU(self->handle, status.get());
|
||||||
|
if (TF_GetCode(status.get()) != TF_OK) {
|
||||||
|
PyErr_SetString(PyExc_BufferError,
|
||||||
|
tensorflow::strings::StrCat("Error copying tensor to CPU:",
|
||||||
|
TF_Message(status.get()))
|
||||||
|
.c_str());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TensorHandleToNumpy is zero-copy for everything but DT_RESOURCE and
|
||||||
|
// DT_STRING so the following is only slightly slower than a NumPy-free
|
||||||
|
// implementation.
|
||||||
|
auto py_array = tensorflow::make_safe(TensorHandleToNumpy(handle));
|
||||||
|
if (py_array == nullptr ||
|
||||||
|
PyObject_GetBuffer(py_array.get(), view, flags) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
view->readonly = 1;
|
||||||
|
|
||||||
|
int num_dims = TFE_TensorHandleNumDims(handle, status.get());
|
||||||
|
if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_BufferError)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
DCHECK(view->ndim == num_dims);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyBufferProcs EagerTensor_as_buffer = {
|
||||||
|
#if PY_MAJOR_VERSION < 3
|
||||||
|
nullptr, nullptr, nullptr, nullptr,
|
||||||
|
#endif
|
||||||
|
(getbufferproc)EagerTensor_getbuffer,
|
||||||
|
// Never called because getbufferproc delegates to NumPy.
|
||||||
|
(releasebufferproc) nullptr};
|
||||||
|
|
||||||
// Note that here we are trying to dynamically create a new class as a subclass
|
// Note that here we are trying to dynamically create a new class as a subclass
|
||||||
// of a "HEAPTYPE" class that is itself created in python code and passed in at
|
// of a "HEAPTYPE" class that is itself created in python code and passed in at
|
||||||
// runtime. This is fairly atypical and undocumented.
|
// runtime. This is fairly atypical and undocumented.
|
||||||
@ -764,6 +821,9 @@ static PyType_Slot EagerTensor_Type_slots[] = {
|
|||||||
{0, nullptr},
|
{0, nullptr},
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
#define EAGER_TENSOR_TPFLAGS (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_NEWBUFFER)
|
||||||
|
|
||||||
// TODO(agarwal): support active_trace.
|
// TODO(agarwal): support active_trace.
|
||||||
static PyTypeObject _EagerTensorType = {
|
static PyTypeObject _EagerTensorType = {
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -786,8 +846,8 @@ static PyTypeObject _EagerTensorType = {
|
|||||||
nullptr, /* tp_str */
|
nullptr, /* tp_str */
|
||||||
nullptr, /* tp_getattro */
|
nullptr, /* tp_getattro */
|
||||||
nullptr, /* tp_setattro */
|
nullptr, /* tp_setattro */
|
||||||
nullptr, /* tp_as_buffer */
|
&EagerTensor_as_buffer, /* tp_as_buffer */
|
||||||
Py_TPFLAGS_DEFAULT, /* tp_flags */
|
EAGER_TENSOR_TPFLAGS, /* tp_flags */
|
||||||
nullptr, /* tp_doc */
|
nullptr, /* tp_doc */
|
||||||
nullptr, /* tp_traverse */
|
nullptr, /* tp_traverse */
|
||||||
nullptr, /* tp_clear */
|
nullptr, /* tp_clear */
|
||||||
@ -946,6 +1006,7 @@ PyObject* TFE_Py_InitEagerTensor(PyObject* base_class) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
EagerTensorType->tp_dictoffset = offsetof(EagerTensor, dict);
|
EagerTensorType->tp_dictoffset = offsetof(EagerTensor, dict);
|
||||||
|
EagerTensorType->tp_as_buffer = &EagerTensor_as_buffer;
|
||||||
#else
|
#else
|
||||||
_EagerTensorType.tp_base = base_class_type;
|
_EagerTensorType.tp_base = base_class_type;
|
||||||
|
|
||||||
|
@ -387,6 +387,34 @@ class TFETensorTest(test_util.TensorFlowTestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
constant_op.constant(t.min, dtype=t).numpy(), t.min)
|
constant_op.constant(t.min, dtype=t).numpy(), t.min)
|
||||||
|
|
||||||
|
def test_numpyIsView(self):
|
||||||
|
t = constant_op.constant([0.0])
|
||||||
|
t._numpy()[0] = 42.0
|
||||||
|
self.assertAllClose(t, constant_op.constant([42.0]))
|
||||||
|
|
||||||
|
def testMemoryviewIsReadonly(self):
|
||||||
|
t = constant_op.constant([0.0])
|
||||||
|
self.assertTrue(memoryview(t).readonly)
|
||||||
|
|
||||||
|
@test_util.assert_no_new_pyobjects_executing_eagerly
|
||||||
|
def testMemoryviewScalar(self):
|
||||||
|
t = constant_op.constant(42.0)
|
||||||
|
self.assertAllEqual(
|
||||||
|
np.array(memoryview(t)), np.array(42.0, dtype=np.float32))
|
||||||
|
|
||||||
|
@test_util.assert_no_new_pyobjects_executing_eagerly
|
||||||
|
def testMemoryviewEmpty(self):
|
||||||
|
t = constant_op.constant([], dtype=np.float32)
|
||||||
|
self.assertAllEqual(np.array(memoryview(t)), np.array([]))
|
||||||
|
|
||||||
|
@test_util.run_gpu_only
|
||||||
|
@test_util.assert_no_new_pyobjects_executing_eagerly
|
||||||
|
def testMemoryviewCopyToCPU(self):
|
||||||
|
with ops.device("/device:GPU:0"):
|
||||||
|
t = constant_op.constant([0.0])
|
||||||
|
self.assertAllEqual(
|
||||||
|
np.array(memoryview(t)), np.array([0.0], dtype=np.float32))
|
||||||
|
|
||||||
|
|
||||||
class TFETensorUtilTest(test_util.TensorFlowTestCase):
|
class TFETensorUtilTest(test_util.TensorFlowTestCase):
|
||||||
|
|
||||||
@ -499,11 +527,6 @@ class TFETensorUtilTest(test_util.TensorFlowTestCase):
|
|||||||
ValueError, "non-rectangular Python sequence"):
|
ValueError, "non-rectangular Python sequence"):
|
||||||
constant_op.constant(l)
|
constant_op.constant(l)
|
||||||
|
|
||||||
def test_numpyIsView(self):
|
|
||||||
t = constant_op.constant([0.0])
|
|
||||||
t._numpy()[0] = 42.0
|
|
||||||
self.assertAllClose(t, constant_op.constant([42.0]))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -780,6 +780,9 @@ class _EagerTensorBase(Tensor):
|
|||||||
return int(maybe_arr) # Must be a NumPy scalar.
|
return int(maybe_arr) # Must be a NumPy scalar.
|
||||||
|
|
||||||
def __array__(self, dtype=None):
|
def __array__(self, dtype=None):
|
||||||
|
# This is only called if the buffer interface conversion failed.
|
||||||
|
# Remove once numpy/numpy#13507 is merged and released or py_function
|
||||||
|
# creates EagerTensors with a non-nullptr context.
|
||||||
return np.asarray(self.numpy(), dtype=dtype)
|
return np.asarray(self.numpy(), dtype=dtype)
|
||||||
|
|
||||||
def __format__(self, format_spec):
|
def __format__(self, format_spec):
|
||||||
|
@ -188,13 +188,9 @@ Status TF_DataType_to_PyArray_TYPE(TF_DataType tf_datatype,
|
|||||||
|
|
||||||
Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
|
Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
|
||||||
std::function<void()> destructor, PyObject** result) {
|
std::function<void()> destructor, PyObject** result) {
|
||||||
int size = 1;
|
if (dtype == DT_STRING || dtype == DT_RESOURCE) {
|
||||||
for (int i = 0; i < dim_size; ++i) {
|
|
||||||
size *= dims[i];
|
|
||||||
}
|
|
||||||
if (dtype == DT_STRING || dtype == DT_RESOURCE || size == 0) {
|
|
||||||
return errors::FailedPrecondition(
|
return errors::FailedPrecondition(
|
||||||
"Cannot convert strings, resources, or empty Tensors.");
|
"Cannot convert string or resource Tensors.");
|
||||||
}
|
}
|
||||||
|
|
||||||
int type_num = -1;
|
int type_num = -1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user