From 00f041777ff498e1b4a6ee7825390deaaf04b5db Mon Sep 17 00:00:00 2001 From: Jonathan Hseu <jhseu@google.com> Date: Thu, 12 Mar 2020 13:02:12 -0700 Subject: [PATCH] Rollback due to internal failure. PR #37190: Re-instate PR #36578: Go: NewTensor & Value performance improvement Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/37190 See https://github.com/tensorflow/tensorflow/pull/36578. Have fixed issues with pointer arithmetic checks introduced with Go 1.14 that caused original PR to be reverted PiperOrigin-RevId: 300607819 Change-Id: I4fde4fa0449cd7bd56e03fd2e0f4ab739915a96b --- tensorflow/go/tensor.go | 285 +++++++++++++---------------------- tensorflow/go/tensor_test.go | 64 +++----- 2 files changed, 123 insertions(+), 226 deletions(-) diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index b5774d92b66..9bc643ae6d2 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -94,22 +94,9 @@ func NewTensor(value interface{}) (*Tensor, error) { raw := tensorData(t.c) buf := bytes.NewBuffer(raw[:0:len(raw)]) if dataType != String { - if isAllArray(val.Type()) { - // We have arrays all the way down, or just primitive types. We can - // just copy the memory in as it is all contiguous. - if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil { - return nil, err - } - } else { - // When there are slices involved the memory for each leaf slice may - // not be contiguous with the others or in the order we might - // expect, so we need to work our way down to each slice of - // primitives and copy them individually - if err := encodeTensorWithSlices(buf, val, shape); err != nil { - return nil, err - } + if err := encodeTensor(buf, val, shape); err != nil { + return nil, err } - if uintptr(buf.Len()) != nbytes { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } @@ -125,43 +112,6 @@ func NewTensor(value interface{}) (*Tensor, error) { return t, nil } -// isAllArray returns true if type is a primitive type or an array of primitive -// types or an array of ... etc.. When this is true the data we want is -// contiguous in RAM. -func isAllArray(typ reflect.Type) bool { - switch typ.Kind() { - case reflect.Slice: - return false - case reflect.Array: - return isAllArray(typ.Elem()) - default: - // We know the type is slices/arrays of slices/arrays of primitive types. - return true - } -} - -// eface defines what an interface type actually is: a pointer to type -// information about the encapsulated type and a pointer to the encapsulated -// value. -type eface struct { - rtype unsafe.Pointer - data unsafe.Pointer -} - -// unpackEFace gives us an effient way to get us a pointer to the value carried -// in an interface. If you wrap a pointer type in an interface then the pointer -// is directly stored in the interface struct. If you wrap a value type in an -// interface then the compiler copies the value into a newly allocated piece of -// memory and stores a pointer to that memory in the interface. So we're -// guaranteed to get a pointer. Go reflection doesn't expose the pointer to -// value types straightforwardly as it doesn't want you to think you have a -// reference to the original value. But we just want a pointer to make it -// efficient to read the value, so cheating like this should be safe and -// reasonable. -func unpackEFace(obj interface{}) *eface { - return (*eface)(unsafe.Pointer(&obj)) -} - // ReadTensor constructs a Tensor with the provided type and shape from the // serialized tensor contents in r. // @@ -218,93 +168,23 @@ func (t *Tensor) Shape() []int64 { return t.shape } // Tensor(int64, 0): int64 // Tensor(float64, 3): [][][]float64 func (t *Tensor) Value() interface{} { - raw := tensorData(t.c) - shape := t.Shape() - dt := t.DataType() - if dt != String { - return decodeTensor(raw, shape, dt).Interface() - } - - typ := typeOf(dt, shape) + typ := typeOf(t.DataType(), t.Shape()) val := reflect.New(typ) - nflattened := numElements(shape) - d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()} - if err := d.decode(val, shape); err != nil { - panic(bug("unable to decode String tensor with shape %v - %v", shape, err)) + raw := tensorData(t.c) + if t.DataType() != String { + if err := decodeTensor(bytes.NewReader(raw), t.Shape(), typ, val); err != nil { + panic(bug("unable to decode Tensor of type %v and shape %v - %v", t.DataType(), t.Shape(), err)) + } + } else { + nflattened := numElements(t.Shape()) + d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()} + if err := d.decode(val, t.Shape()); err != nil { + panic(bug("unable to decode String tensor with shape %v - %v", t.Shape(), err)) + } } return reflect.Indirect(val).Interface() } -func decodeTensor(raw []byte, shape []int64, dt DataType) reflect.Value { - typ := typeForDataType(dt) - // Create a 1-dimensional slice of the base large enough for the data and - // copy the data in. - n := int(numElements(shape)) - l := n * int(typ.Size()) - typ = reflect.SliceOf(typ) - slice := reflect.MakeSlice(typ, n, n) - baseBytes := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: slice.Pointer(), - Len: l, - Cap: l, - })) - copy(baseBytes, raw) - // Now we have the data in place in the base slice we can add the - // dimensions. We want to walk backwards through the shape. If the shape is - // length 1 or 0 then we're already done. - if len(shape) == 0 { - return slice.Index(0) - } - if len(shape) == 1 { - return slice - } - // We have a special case if the tensor has no data. Our backing slice is - // empty, but we still want to create slices following the shape. In this - // case only the final part of the shape will be 0 and we want to recalculate - // n at this point ignoring that 0. - // For example if our shape is 3 * 2 * 0 then n will be zero, but we still - // want 6 zero length slices to group as follows. - // {{} {}} {{} {}} {{} {}} - if n == 0 { - n = int(numElements(shape[:len(shape)-1])) - } - for i := len(shape) - 2; i >= 0; i-- { - underlyingSize := typ.Elem().Size() - typ = reflect.SliceOf(typ) - subsliceLen := int(shape[i+1]) - if subsliceLen != 0 { - n = n / subsliceLen - } - // Just using reflection it is difficult to avoid unnecessary - // allocations while setting up the sub-slices as the Slice function on - // a slice Value allocates. So we end up doing pointer arithmetic! - // Pointer() on a slice gives us access to the data backing the slice. - // We insert slice headers directly into this data. - data := slice.Pointer() - nextSlice := reflect.MakeSlice(typ, n, n) - - for j := 0; j < n; j++ { - // This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen] - setSliceInSlice(nextSlice, j, reflect.SliceHeader{ - Data: data + (uintptr(j*subsliceLen) * underlyingSize), - Len: subsliceLen, - Cap: subsliceLen, - }) - } - - slice = nextSlice - } - return slice -} - -//go:nocheckptr -// setSliceInSlice sets slice[index] = content. We set nocheckptr as the pointer -// checking stuff doesn't seem to work well with reflection. -func setSliceInSlice(slice reflect.Value, index int, content reflect.SliceHeader) { - const sliceSize = unsafe.Sizeof(reflect.SliceHeader{}) - *(*reflect.SliceHeader)(unsafe.Pointer(slice.Pointer() + (uintptr(index) * sliceSize))) = content -} - // WriteContentsTo writes the serialized contents of t to w. // // Returns the number of bytes written. See ReadTensor for @@ -381,18 +261,18 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro return shape, dt, fmt.Errorf("unsupported type %v", typ) } -func typeForDataType(dt DataType) reflect.Type { - for _, t := range types { - if dt == DataType(t.dataType) { - return t.typ - } - } - panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt)) -} - // typeOf converts from a DataType and Shape to the equivalent Go type. func typeOf(dt DataType, shape []int64) reflect.Type { - ret := typeForDataType(dt) + var ret reflect.Type + for _, t := range types { + if dt == DataType(t.dataType) { + ret = t.typ + break + } + } + if ret == nil { + panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt)) + } for range shape { ret = reflect.SliceOf(ret) } @@ -422,53 +302,92 @@ func byteSizeOfEncodedStrings(val interface{}) uintptr { return size } -// encodeTensorWithSlices writes v to the specified buffer using the format specified in +// encodeTensor writes v to the specified buffer using the format specified in // c_api.h. Use stringEncoder for String tensors. -func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) error { - // If current dimension is a slice, verify that it has the expected size - // Go's type system makes that guarantee for arrays. - if v.Kind() == reflect.Slice { - expected := int(shape[0]) - if v.Len() != expected { - return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) +func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { + switch v.Kind() { + case reflect.Bool: + b := byte(0) + if v.Bool() { + b = 1 } - } else if v.Kind() != reflect.Array { - return fmt.Errorf("unsupported type %v", v.Type()) - } - - // Once we have just a single dimension we can just copy the data - if len(shape) == 1 && v.Len() > 0 { - elt := v.Index(0) - if !elt.CanAddr() { - panic("cannot take address") - } - ptr := unsafe.Pointer(elt.Addr().Pointer()) - return copyPtr(w, ptr, v.Len()*int(elt.Type().Size())) - } - - subShape := shape[1:] - for i := 0; i < v.Len(); i++ { - err := encodeTensorWithSlices(w, v.Index(i), subShape) - if err != nil { + if err := w.WriteByte(b); err != nil { + return err + } + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { return err } - } + case reflect.Array, reflect.Slice: + // If current dimension is a slice, verify that it has the expected size + // Go's type system makes that guarantee for arrays. + if v.Kind() == reflect.Slice { + expected := int(shape[0]) + if v.Len() != expected { + return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) + } + } + + // Optimisation: if only one dimension is left we can use binary.Write() directly for this slice + if len(shape) == 1 && v.Len() > 0 { + switch v.Index(0).Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return binary.Write(w, nativeEndian, v.Interface()) + } + } + + subShape := shape[1:] + for i := 0; i < v.Len(); i++ { + err := encodeTensor(w, v.Index(i), subShape) + if err != nil { + return err + } + } + + default: + return fmt.Errorf("unsupported type %v", v.Type()) + } return nil } -// copyPtr copies the backing data for a slice or array directly into w. Note -// we don't need to worry about byte ordering because we want the natural byte -// order for the machine we're running on. -func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error { - // Convert our slice header into a []byte so we can call w.Write - b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(ptr), - Len: l, - Cap: l, - })) - _, err := w.Write(b) - return err +// decodeTensor decodes the Tensor from the buffer to ptr using the format +// specified in c_api.h. Use stringDecoder for String tensors. +func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.Value) error { + switch typ.Kind() { + case reflect.Bool: + b, err := r.ReadByte() + if err != nil { + return err + } + ptr.Elem().SetBool(b == 1) + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { + return err + } + + case reflect.Slice: + val := reflect.Indirect(ptr) + val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0]))) + + // Optimization: if only one dimension is left we can use binary.Read() directly for this slice + if len(shape) == 1 && val.Len() > 0 { + switch val.Index(0).Kind() { + case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: + return binary.Read(r, nativeEndian, val.Interface()) + } + } + + for i := 0; i < val.Len(); i++ { + if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil { + return err + } + } + + default: + return fmt.Errorf("unsupported type %v", typ) + } + return nil } type stringEncoder struct { diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 4d2df3a97dd..dc533cd3e1c 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -18,7 +18,6 @@ package tensorflow import ( "bytes" - "fmt" "io" "reflect" "testing" @@ -277,7 +276,6 @@ func TestReadTensorReadAll(t *testing.T) { } func benchmarkNewTensor(b *testing.B, v interface{}) { - b.ReportAllocs() for i := 0; i < b.N; i++ { if t, err := NewTensor(v); err != nil || t == nil { b.Fatalf("(%v, %v)", t, err) @@ -285,52 +283,32 @@ func benchmarkNewTensor(b *testing.B, v interface{}) { } } -func benchmarkValueTensor(b *testing.B, v interface{}) { - t, err := NewTensor(v) - if err != nil { - b.Fatalf("(%v, %v)", t, err) - } - b.ReportAllocs() - b.ResetTimer() +func BenchmarkNewTensor(b *testing.B) { + var ( + // Some sample sizes from the Inception image labeling model. + // Where input tensors correspond to a 224x224 RGB image + // flattened into a vector. + vector [224 * 224 * 3]int32 + ) + b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) }) +} +func benchmarkDecodeTensor(b *testing.B, t *Tensor) { for i := 0; i < b.N; i++ { _ = t.Value() } } -func BenchmarkTensor(b *testing.B) { - // Some sample sizes from the Inception image labeling model. - // Where input tensors correspond to a 224x224 RGB image - // flattened into a vector. - var vector [224 * 224 * 3]int32 - var arrays [100][100][100]int32 - - l3 := make([][][]float32, 100) - l2 := make([][]float32, 100*100) - l1 := make([]float32, 100*100*100) - for i := range l2 { - l2[i] = l1[i*100 : (i+1)*100] +func BenchmarkDecodeTensor(b *testing.B) { + var ( + // Some sample sizes from the Inception image labeling model. + // Where input tensors correspond to a 224x224 RGB image + // flattened into a vector. + vector [224 * 224 * 3]int32 + ) + t, err := NewTensor(vector) + if err != nil { + b.Fatalf("(%v, %v)", t, err) } - for i := range l3 { - l3[i] = l2[i*100 : (i+1)*100] - } - - tests := []interface{}{ - vector, - arrays, - l1, - l2, - l3, - } - b.Run("New", func(b *testing.B) { - for _, test := range tests { - b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkNewTensor(b, test) }) - } - }) - b.Run("Value", func(b *testing.B) { - for _, test := range tests { - b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkValueTensor(b, test) }) - } - }) - + b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) }) }