Rollback due to internal failure.

PR #37190: Re-instate PR #36578: Go: NewTensor & Value performance improvement

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/37190

See https://github.com/tensorflow/tensorflow/pull/36578.

Have fixed issues with pointer arithmetic checks introduced with Go 1.14 that caused original PR to be reverted

PiperOrigin-RevId: 300607819
Change-Id: I4fde4fa0449cd7bd56e03fd2e0f4ab739915a96b
This commit is contained in:
Jonathan Hseu 2020-03-12 13:02:12 -07:00 committed by TensorFlower Gardener
parent 3511dbe928
commit 00f041777f
2 changed files with 123 additions and 226 deletions

View File

@ -94,22 +94,9 @@ func NewTensor(value interface{}) (*Tensor, error) {
raw := tensorData(t.c)
buf := bytes.NewBuffer(raw[:0:len(raw)])
if dataType != String {
if isAllArray(val.Type()) {
// We have arrays all the way down, or just primitive types. We can
// just copy the memory in as it is all contiguous.
if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil {
return nil, err
}
} else {
// When there are slices involved the memory for each leaf slice may
// not be contiguous with the others or in the order we might
// expect, so we need to work our way down to each slice of
// primitives and copy them individually
if err := encodeTensorWithSlices(buf, val, shape); err != nil {
return nil, err
}
if err := encodeTensor(buf, val, shape); err != nil {
return nil, err
}
if uintptr(buf.Len()) != nbytes {
return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
}
@ -125,43 +112,6 @@ func NewTensor(value interface{}) (*Tensor, error) {
return t, nil
}
// isAllArray returns true if type is a primitive type or an array of primitive
// types or an array of ... etc.. When this is true the data we want is
// contiguous in RAM.
func isAllArray(typ reflect.Type) bool {
switch typ.Kind() {
case reflect.Slice:
return false
case reflect.Array:
return isAllArray(typ.Elem())
default:
// We know the type is slices/arrays of slices/arrays of primitive types.
return true
}
}
// eface defines what an interface type actually is: a pointer to type
// information about the encapsulated type and a pointer to the encapsulated
// value.
type eface struct {
rtype unsafe.Pointer
data unsafe.Pointer
}
// unpackEFace gives us an effient way to get us a pointer to the value carried
// in an interface. If you wrap a pointer type in an interface then the pointer
// is directly stored in the interface struct. If you wrap a value type in an
// interface then the compiler copies the value into a newly allocated piece of
// memory and stores a pointer to that memory in the interface. So we're
// guaranteed to get a pointer. Go reflection doesn't expose the pointer to
// value types straightforwardly as it doesn't want you to think you have a
// reference to the original value. But we just want a pointer to make it
// efficient to read the value, so cheating like this should be safe and
// reasonable.
func unpackEFace(obj interface{}) *eface {
return (*eface)(unsafe.Pointer(&obj))
}
// ReadTensor constructs a Tensor with the provided type and shape from the
// serialized tensor contents in r.
//
@ -218,93 +168,23 @@ func (t *Tensor) Shape() []int64 { return t.shape }
// Tensor(int64, 0): int64
// Tensor(float64, 3): [][][]float64
func (t *Tensor) Value() interface{} {
raw := tensorData(t.c)
shape := t.Shape()
dt := t.DataType()
if dt != String {
return decodeTensor(raw, shape, dt).Interface()
}
typ := typeOf(dt, shape)
typ := typeOf(t.DataType(), t.Shape())
val := reflect.New(typ)
nflattened := numElements(shape)
d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()}
if err := d.decode(val, shape); err != nil {
panic(bug("unable to decode String tensor with shape %v - %v", shape, err))
raw := tensorData(t.c)
if t.DataType() != String {
if err := decodeTensor(bytes.NewReader(raw), t.Shape(), typ, val); err != nil {
panic(bug("unable to decode Tensor of type %v and shape %v - %v", t.DataType(), t.Shape(), err))
}
} else {
nflattened := numElements(t.Shape())
d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()}
if err := d.decode(val, t.Shape()); err != nil {
panic(bug("unable to decode String tensor with shape %v - %v", t.Shape(), err))
}
}
return reflect.Indirect(val).Interface()
}
func decodeTensor(raw []byte, shape []int64, dt DataType) reflect.Value {
typ := typeForDataType(dt)
// Create a 1-dimensional slice of the base large enough for the data and
// copy the data in.
n := int(numElements(shape))
l := n * int(typ.Size())
typ = reflect.SliceOf(typ)
slice := reflect.MakeSlice(typ, n, n)
baseBytes := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: slice.Pointer(),
Len: l,
Cap: l,
}))
copy(baseBytes, raw)
// Now we have the data in place in the base slice we can add the
// dimensions. We want to walk backwards through the shape. If the shape is
// length 1 or 0 then we're already done.
if len(shape) == 0 {
return slice.Index(0)
}
if len(shape) == 1 {
return slice
}
// We have a special case if the tensor has no data. Our backing slice is
// empty, but we still want to create slices following the shape. In this
// case only the final part of the shape will be 0 and we want to recalculate
// n at this point ignoring that 0.
// For example if our shape is 3 * 2 * 0 then n will be zero, but we still
// want 6 zero length slices to group as follows.
// {{} {}} {{} {}} {{} {}}
if n == 0 {
n = int(numElements(shape[:len(shape)-1]))
}
for i := len(shape) - 2; i >= 0; i-- {
underlyingSize := typ.Elem().Size()
typ = reflect.SliceOf(typ)
subsliceLen := int(shape[i+1])
if subsliceLen != 0 {
n = n / subsliceLen
}
// Just using reflection it is difficult to avoid unnecessary
// allocations while setting up the sub-slices as the Slice function on
// a slice Value allocates. So we end up doing pointer arithmetic!
// Pointer() on a slice gives us access to the data backing the slice.
// We insert slice headers directly into this data.
data := slice.Pointer()
nextSlice := reflect.MakeSlice(typ, n, n)
for j := 0; j < n; j++ {
// This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen]
setSliceInSlice(nextSlice, j, reflect.SliceHeader{
Data: data + (uintptr(j*subsliceLen) * underlyingSize),
Len: subsliceLen,
Cap: subsliceLen,
})
}
slice = nextSlice
}
return slice
}
//go:nocheckptr
// setSliceInSlice sets slice[index] = content. We set nocheckptr as the pointer
// checking stuff doesn't seem to work well with reflection.
func setSliceInSlice(slice reflect.Value, index int, content reflect.SliceHeader) {
const sliceSize = unsafe.Sizeof(reflect.SliceHeader{})
*(*reflect.SliceHeader)(unsafe.Pointer(slice.Pointer() + (uintptr(index) * sliceSize))) = content
}
// WriteContentsTo writes the serialized contents of t to w.
//
// Returns the number of bytes written. See ReadTensor for
@ -381,18 +261,18 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro
return shape, dt, fmt.Errorf("unsupported type %v", typ)
}
func typeForDataType(dt DataType) reflect.Type {
for _, t := range types {
if dt == DataType(t.dataType) {
return t.typ
}
}
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
}
// typeOf converts from a DataType and Shape to the equivalent Go type.
func typeOf(dt DataType, shape []int64) reflect.Type {
ret := typeForDataType(dt)
var ret reflect.Type
for _, t := range types {
if dt == DataType(t.dataType) {
ret = t.typ
break
}
}
if ret == nil {
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
}
for range shape {
ret = reflect.SliceOf(ret)
}
@ -422,53 +302,92 @@ func byteSizeOfEncodedStrings(val interface{}) uintptr {
return size
}
// encodeTensorWithSlices writes v to the specified buffer using the format specified in
// encodeTensor writes v to the specified buffer using the format specified in
// c_api.h. Use stringEncoder for String tensors.
func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) error {
// If current dimension is a slice, verify that it has the expected size
// Go's type system makes that guarantee for arrays.
if v.Kind() == reflect.Slice {
expected := int(shape[0])
if v.Len() != expected {
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
switch v.Kind() {
case reflect.Bool:
b := byte(0)
if v.Bool() {
b = 1
}
} else if v.Kind() != reflect.Array {
return fmt.Errorf("unsupported type %v", v.Type())
}
// Once we have just a single dimension we can just copy the data
if len(shape) == 1 && v.Len() > 0 {
elt := v.Index(0)
if !elt.CanAddr() {
panic("cannot take address")
}
ptr := unsafe.Pointer(elt.Addr().Pointer())
return copyPtr(w, ptr, v.Len()*int(elt.Type().Size()))
}
subShape := shape[1:]
for i := 0; i < v.Len(); i++ {
err := encodeTensorWithSlices(w, v.Index(i), subShape)
if err != nil {
if err := w.WriteByte(b); err != nil {
return err
}
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
return err
}
}
case reflect.Array, reflect.Slice:
// If current dimension is a slice, verify that it has the expected size
// Go's type system makes that guarantee for arrays.
if v.Kind() == reflect.Slice {
expected := int(shape[0])
if v.Len() != expected {
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
}
}
// Optimisation: if only one dimension is left we can use binary.Write() directly for this slice
if len(shape) == 1 && v.Len() > 0 {
switch v.Index(0).Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
return binary.Write(w, nativeEndian, v.Interface())
}
}
subShape := shape[1:]
for i := 0; i < v.Len(); i++ {
err := encodeTensor(w, v.Index(i), subShape)
if err != nil {
return err
}
}
default:
return fmt.Errorf("unsupported type %v", v.Type())
}
return nil
}
// copyPtr copies the backing data for a slice or array directly into w. Note
// we don't need to worry about byte ordering because we want the natural byte
// order for the machine we're running on.
func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error {
// Convert our slice header into a []byte so we can call w.Write
b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(ptr),
Len: l,
Cap: l,
}))
_, err := w.Write(b)
return err
// decodeTensor decodes the Tensor from the buffer to ptr using the format
// specified in c_api.h. Use stringDecoder for String tensors.
func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.Value) error {
switch typ.Kind() {
case reflect.Bool:
b, err := r.ReadByte()
if err != nil {
return err
}
ptr.Elem().SetBool(b == 1)
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
return err
}
case reflect.Slice:
val := reflect.Indirect(ptr)
val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0])))
// Optimization: if only one dimension is left we can use binary.Read() directly for this slice
if len(shape) == 1 && val.Len() > 0 {
switch val.Index(0).Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
return binary.Read(r, nativeEndian, val.Interface())
}
}
for i := 0; i < val.Len(); i++ {
if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil {
return err
}
}
default:
return fmt.Errorf("unsupported type %v", typ)
}
return nil
}
type stringEncoder struct {

View File

@ -18,7 +18,6 @@ package tensorflow
import (
"bytes"
"fmt"
"io"
"reflect"
"testing"
@ -277,7 +276,6 @@ func TestReadTensorReadAll(t *testing.T) {
}
func benchmarkNewTensor(b *testing.B, v interface{}) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if t, err := NewTensor(v); err != nil || t == nil {
b.Fatalf("(%v, %v)", t, err)
@ -285,52 +283,32 @@ func benchmarkNewTensor(b *testing.B, v interface{}) {
}
}
func benchmarkValueTensor(b *testing.B, v interface{}) {
t, err := NewTensor(v)
if err != nil {
b.Fatalf("(%v, %v)", t, err)
}
b.ReportAllocs()
b.ResetTimer()
func BenchmarkNewTensor(b *testing.B) {
var (
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
vector [224 * 224 * 3]int32
)
b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) })
}
func benchmarkDecodeTensor(b *testing.B, t *Tensor) {
for i := 0; i < b.N; i++ {
_ = t.Value()
}
}
func BenchmarkTensor(b *testing.B) {
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
var vector [224 * 224 * 3]int32
var arrays [100][100][100]int32
l3 := make([][][]float32, 100)
l2 := make([][]float32, 100*100)
l1 := make([]float32, 100*100*100)
for i := range l2 {
l2[i] = l1[i*100 : (i+1)*100]
func BenchmarkDecodeTensor(b *testing.B) {
var (
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
vector [224 * 224 * 3]int32
)
t, err := NewTensor(vector)
if err != nil {
b.Fatalf("(%v, %v)", t, err)
}
for i := range l3 {
l3[i] = l2[i*100 : (i+1)*100]
}
tests := []interface{}{
vector,
arrays,
l1,
l2,
l3,
}
b.Run("New", func(b *testing.B) {
for _, test := range tests {
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkNewTensor(b, test) })
}
})
b.Run("Value", func(b *testing.B) {
for _, test := range tests {
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkValueTensor(b, test) })
}
})
b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) })
}