diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 9bc643ae6d2..bd84254f698 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -26,6 +26,7 @@ import ( "encoding/binary" "fmt" "io" + "math/bits" "reflect" "runtime" "unsafe" @@ -80,7 +81,7 @@ func NewTensor(value interface{}) (*Tensor, error) { if dataType == String { // TF_STRING tensors are encoded as an array of 8-byte offsets // followed by string data. See c_api.h. - nbytes = uintptr(nflattened*8) + byteSizeOfEncodedStrings(value) + nbytes = uintptr(nflattened*8 + int64(byteSizeOfEncodedStrings(val))) } var shapePtr *C.int64_t if len(shape) > 0 { @@ -94,9 +95,22 @@ func NewTensor(value interface{}) (*Tensor, error) { raw := tensorData(t.c) buf := bytes.NewBuffer(raw[:0:len(raw)]) if dataType != String { - if err := encodeTensor(buf, val, shape); err != nil { - return nil, err + if isAllArray(val.Type()) { + // We have arrays all the way down, or just primitive types. We can + // just copy the memory in as it is all contiguous. + if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil { + return nil, err + } + } else { + // When there are slices involved the memory for each leaf slice may + // not be contiguous with the others or in the order we might + // expect, so we need to work our way down to each slice of + // primitives and copy them individually + if err := encodeTensorWithSlices(buf, val, shape); err != nil { + return nil, err + } } + if uintptr(buf.Len()) != nbytes { return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len()) } @@ -112,6 +126,43 @@ func NewTensor(value interface{}) (*Tensor, error) { return t, nil } +// isAllArray returns true if type is a primitive type or an array of primitive +// types or an array of ... etc.. When this is true the data we want is +// contiguous in RAM. +func isAllArray(typ reflect.Type) bool { + switch typ.Kind() { + case reflect.Slice: + return false + case reflect.Array: + return isAllArray(typ.Elem()) + default: + // We know the type is slices/arrays of slices/arrays of primitive types. + return true + } +} + +// eface defines what an interface type actually is: a pointer to type +// information about the encapsulated type and a pointer to the encapsulated +// value. +type eface struct { + rtype unsafe.Pointer + data unsafe.Pointer +} + +// unpackEFace gives us an effient way to get us a pointer to the value carried +// in an interface. If you wrap a pointer type in an interface then the pointer +// is directly stored in the interface struct. If you wrap a value type in an +// interface then the compiler copies the value into a newly allocated piece of +// memory and stores a pointer to that memory in the interface. So we're +// guaranteed to get a pointer. Go reflection doesn't expose the pointer to +// value types straightforwardly as it doesn't want you to think you have a +// reference to the original value. But we just want a pointer to make it +// efficient to read the value, so cheating like this should be safe and +// reasonable. +func unpackEFace(obj interface{}) *eface { + return (*eface)(unsafe.Pointer(&obj)) +} + // ReadTensor constructs a Tensor with the provided type and shape from the // serialized tensor contents in r. // @@ -168,21 +219,152 @@ func (t *Tensor) Shape() []int64 { return t.shape } // Tensor(int64, 0): int64 // Tensor(float64, 3): [][][]float64 func (t *Tensor) Value() interface{} { - typ := typeOf(t.DataType(), t.Shape()) - val := reflect.New(typ) raw := tensorData(t.c) - if t.DataType() != String { - if err := decodeTensor(bytes.NewReader(raw), t.Shape(), typ, val); err != nil { - panic(bug("unable to decode Tensor of type %v and shape %v - %v", t.DataType(), t.Shape(), err)) + shape := t.Shape() + dt := t.DataType() + return decodeTensor(raw, shape, dt).Interface() +} + +func decodeTensor(raw []byte, shape []int64, dt DataType) reflect.Value { + // Create a 1-dimensional slice of the base large enough for the data and + // copy the data in. + n := int(numElements(shape)) + + var ( + slice reflect.Value + typ reflect.Type + ) + if dt == String { + strs, err := decodeOneDimString(raw, n) + if err != nil { + panic(bug("unable to decode string with shape %v: %v", shape, err)) } + slice = reflect.ValueOf(strs) + typ = slice.Type() } else { - nflattened := numElements(t.Shape()) - d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()} - if err := d.decode(val, t.Shape()); err != nil { - panic(bug("unable to decode String tensor with shape %v - %v", t.Shape(), err)) - } + typ = typeForDataType(dt) + l := n * int(typ.Size()) + typ = reflect.SliceOf(typ) + slice = reflect.MakeSlice(typ, n, n) + baseBytes := *(*[]byte)(unsafe.Pointer(&sliceHeader{ + Data: unsafe.Pointer(slice.Pointer()), + Len: l, + Cap: l, + })) + copy(baseBytes, raw) } - return reflect.Indirect(val).Interface() + + // Now we have the data in place in the base slice we can add the + // dimensions. We want to walk backwards through the shape. If the shape is + // length 1 or 0 then we're already done. + if len(shape) == 0 { + return slice.Index(0) + } + if len(shape) == 1 { + return slice + } + // We have a special case if the tensor has no data. Our backing slice is + // empty, but we still want to create slices following the shape. In this + // case only the final part of the shape will be 0 and we want to recalculate + // n at this point ignoring that 0. + // For example if our shape is 3 * 2 * 0 then n will be zero, but we still + // want 6 zero length slices to group as follows. + // {{} {}} {{} {}} {{} {}} + if n == 0 { + n = int(numElements(shape[:len(shape)-1])) + } + for i := len(shape) - 2; i >= 0; i-- { + underlyingSize := typ.Elem().Size() + typ = reflect.SliceOf(typ) + subsliceLen := int(shape[i+1]) + if subsliceLen != 0 { + n = n / subsliceLen + } + // Just using reflection it is difficult to avoid unnecessary + // allocations while setting up the sub-slices as the Slice function on + // a slice Value allocates. So we end up doing pointer arithmetic! + // Pointer() on a slice gives us access to the data backing the slice. + // We insert slice headers directly into this data. + data := unsafe.Pointer(slice.Pointer()) + nextSlice := reflect.MakeSlice(typ, n, n) + + for j := 0; j < n; j++ { + // This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen] + setSliceInSlice(nextSlice, j, sliceHeader{ + Data: unsafe.Pointer(uintptr(data) + (uintptr(j*subsliceLen) * underlyingSize)), + Len: subsliceLen, + Cap: subsliceLen, + }) + } + + slice = nextSlice + } + return slice +} + +// setSliceInSlice sets slice[index] = content. +func setSliceInSlice(slice reflect.Value, index int, content sliceHeader) { + const sliceSize = unsafe.Sizeof(sliceHeader{}) + // We must cast slice.Pointer to uninptr & back again to avoid GC issues. + // See https://github.com/google/go-cmp/issues/167#issuecomment-546093202 + *(*sliceHeader)(unsafe.Pointer(uintptr(unsafe.Pointer(slice.Pointer())) + (uintptr(index) * sliceSize))) = content +} + +// decodeOneDimString decodes a string tensor into a one-dimensional []string. +func decodeOneDimString(raw []byte, nStrings int) ([]string, error) { + // Start by making an array of all the strings + strs := make([]string, nStrings) + // The first nStrings * 8 bytes of raw are offsets into the second half of + // the raw data. This second half is where the strings are encoded. + offsets := (*(*[]int64)(unsafe.Pointer(&raw)))[:nStrings] + + // Reset raw after the offsets. Now the offsets will work relative to raw + raw = raw[nStrings*8:] + // Next we work out the final length of the string data so we can copy the + // good data out of raw (which is owned by the C tensor and won't be safe + // to access if the tensor is freed) + r := bytes.NewReader(raw) + var totalLength int + for _, offset := range offsets { + // At each offset we should find a varint length of a string. + // Errors here should mean the tensor is corrupt. + if _, err := r.Seek(offset, io.SeekStart); err != nil { + return nil, err + } + l, err := binary.ReadUvarint(r) + if err != nil { + return nil, err + } + totalLength += int(l) + } + + // Lets allocate a big buffer to carry our string data. + stringData := make([]byte, 0, totalLength) + // Now copy the string data across into our new buffer, keeping track of the + // location of each string in the strs slice. + var cursor int + for i, offset := range offsets { + // At each offset we should find a varint length. Read it + if _, err := r.Seek(offset, io.SeekStart); err != nil { + return nil, err + } + l, err := binary.ReadUvarint(r) + if err != nil { + return nil, err + } + + // Then copy the actual string into our large buffer + target := stringData[cursor : cursor+int(l)] + if _, err := r.Read(target); err != nil { + return nil, err + } + // Track where this string data is. + strs[i] = *(*string)(unsafe.Pointer(&target)) + cursor += int(l) + } + + // So now we have a big slice of strings + return strs, nil } // WriteContentsTo writes the serialized contents of t to w. @@ -261,18 +443,18 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro return shape, dt, fmt.Errorf("unsupported type %v", typ) } -// typeOf converts from a DataType and Shape to the equivalent Go type. -func typeOf(dt DataType, shape []int64) reflect.Type { - var ret reflect.Type +func typeForDataType(dt DataType) reflect.Type { for _, t := range types { if dt == DataType(t.dataType) { - ret = t.typ - break + return t.typ } } - if ret == nil { - panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt)) - } + panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt)) +} + +// typeOf converts from a DataType and Shape to the equivalent Go type. +func typeOf(dt DataType, shape []int64) reflect.Type { + ret := typeForDataType(dt) for range shape { ret = reflect.SliceOf(ret) } @@ -289,109 +471,93 @@ func numElements(shape []int64) int64 { // byteSizeOfEncodedStrings returns the size of the encoded strings in val. // val MUST be a string, or a container (array/slice etc.) of strings. -func byteSizeOfEncodedStrings(val interface{}) uintptr { - if s, ok := val.(string); ok { - return uintptr(C.TF_StringEncodedSize(C.size_t(len(s)))) +// Tensorflow encodes strings as the varint encoded length followed by the +// string bytes. We could call into the C library to do this but cgo has a heavy +// overhead. So we just do that calculation in Go +func byteSizeOfEncodedStrings(val reflect.Value) int { + if val.Kind() == reflect.String { + return sizeVarUint(uint64(val.Len())) + val.Len() + } + if val.Kind() != reflect.Slice && val.Kind() != reflect.Array { + panic(fmt.Sprintf("unexpected type %s", val.Type())) } // Otherwise must be an array or slice. - var size uintptr - v := reflect.ValueOf(val) - for i := 0; i < v.Len(); i++ { - size += byteSizeOfEncodedStrings(v.Index(i).Interface()) + var size int + for i := 0; i < val.Len(); i++ { + size += byteSizeOfEncodedStrings(val.Index(i)) } return size } -// encodeTensor writes v to the specified buffer using the format specified in -// c_api.h. Use stringEncoder for String tensors. -func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error { - switch v.Kind() { - case reflect.Bool: - b := byte(0) - if v.Bool() { - b = 1 - } - if err := w.WriteByte(b); err != nil { - return err - } - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - if err := binary.Write(w, nativeEndian, v.Interface()); err != nil { - return err - } - - case reflect.Array, reflect.Slice: - // If current dimension is a slice, verify that it has the expected size - // Go's type system makes that guarantee for arrays. - if v.Kind() == reflect.Slice { - expected := int(shape[0]) - if v.Len() != expected { - return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) - } - } - - // Optimisation: if only one dimension is left we can use binary.Write() directly for this slice - if len(shape) == 1 && v.Len() > 0 { - switch v.Index(0).Kind() { - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - return binary.Write(w, nativeEndian, v.Interface()) - } - } - - subShape := shape[1:] - for i := 0; i < v.Len(); i++ { - err := encodeTensor(w, v.Index(i), subShape) - if err != nil { - return err - } - } - - default: - return fmt.Errorf("unsupported type %v", v.Type()) +// sizeVarUint determines how many bytes it would take to encode the int v as +// an unsigned varint +func sizeVarUint(v uint64) int { + if v < 0x80 { + return 1 } - return nil + bits := bits.Len64(v) + return (bits + 6) / 7 } -// decodeTensor decodes the Tensor from the buffer to ptr using the format -// specified in c_api.h. Use stringDecoder for String tensors. -func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.Value) error { - switch typ.Kind() { - case reflect.Bool: - b, err := r.ReadByte() +// encodeTensorWithSlices writes v to the specified buffer using the format specified in +// c_api.h. Use stringEncoder for String tensors. +func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) error { + // If current dimension is a slice, verify that it has the expected size + // Go's type system makes that guarantee for arrays. + if v.Kind() == reflect.Slice { + expected := int(shape[0]) + if v.Len() != expected { + return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected) + } + } else if v.Kind() != reflect.Array { + return fmt.Errorf("unsupported type %v", v.Type()) + } + + // Once we have just a single dimension we can just copy the data + if len(shape) == 1 && v.Len() > 0 { + elt := v.Index(0) + if !elt.CanAddr() { + panic("cannot take address") + } + ptr := unsafe.Pointer(elt.Addr().Pointer()) + return copyPtr(w, ptr, v.Len()*int(elt.Type().Size())) + } + + subShape := shape[1:] + for i := 0; i < v.Len(); i++ { + err := encodeTensorWithSlices(w, v.Index(i), subShape) if err != nil { return err } - ptr.Elem().SetBool(b == 1) - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil { - return err - } - - case reflect.Slice: - val := reflect.Indirect(ptr) - val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0]))) - - // Optimization: if only one dimension is left we can use binary.Read() directly for this slice - if len(shape) == 1 && val.Len() > 0 { - switch val.Index(0).Kind() { - case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128: - return binary.Read(r, nativeEndian, val.Interface()) - } - } - - for i := 0; i < val.Len(); i++ { - if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil { - return err - } - } - - default: - return fmt.Errorf("unsupported type %v", typ) } + return nil } +// It isn't safe to use reflect.SliceHeader as it uses a uintptr for Data and +// this is not inspected by the garbage collector +type sliceHeader struct { + Data unsafe.Pointer + Len int + Cap int +} + +// copyPtr copies the backing data for a slice or array directly into w. Note +// we don't need to worry about byte ordering because we want the natural byte +// order for the machine we're running on. +func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error { + // Convert our slice header into a []byte so we can call w.Write + b := *(*[]byte)(unsafe.Pointer(&sliceHeader{ + Data: ptr, + Len: l, + Cap: l, + })) + _, err := w.Write(b) + return err +} + type stringEncoder struct { - offsets io.Writer + offsets *bytes.Buffer data []byte offset uint64 status *status @@ -399,19 +565,18 @@ type stringEncoder struct { func (e *stringEncoder) encode(v reflect.Value, shape []int64) error { if v.Kind() == reflect.String { - if err := binary.Write(e.offsets, nativeEndian, e.offset); err != nil { + if err := copyPtr(e.offsets, unsafe.Pointer(&e.offset), int(unsafe.Sizeof(e.offset))); err != nil { return err } - var ( - s = v.Interface().(string) - src = C.CString(s) - srcLen = C.size_t(len(s)) - dst = (*C.char)(unsafe.Pointer(&e.data[e.offset])) - dstLen = C.size_t(uint64(len(e.data)) - e.offset) - ) - e.offset += uint64(C.TF_StringEncode(src, srcLen, dst, dstLen, e.status.c)) - C.free(unsafe.Pointer(src)) - return e.status.Err() + // A string is encoded as the varint length followed by the string bytes. + // We do this in Go to avoid the considerable overhead of a cgo call into + // the tensorflow library + s := v.String() + n := binary.PutUvarint(e.data[e.offset:], uint64(len(s))) + e.offset += uint64(n) + n = copy(e.data[e.offset:], s) + e.offset += uint64(n) + return nil } if v.Kind() == reflect.Slice { @@ -430,45 +595,6 @@ func (e *stringEncoder) encode(v reflect.Value, shape []int64) error { return nil } -type stringDecoder struct { - offsets io.Reader - data []byte - status *status -} - -func (d *stringDecoder) decode(ptr reflect.Value, shape []int64) error { - if len(shape) == 0 { - var offset uint64 - if err := binary.Read(d.offsets, nativeEndian, &offset); err != nil { - return err - } - var ( - src = (*C.char)(unsafe.Pointer(&d.data[offset])) - srcLen = C.size_t(len(d.data)) - C.size_t(offset) - dst *C.char - dstLen C.size_t - ) - if offset > uint64(len(d.data)) { - return fmt.Errorf("invalid offsets in String Tensor") - } - C.TF_StringDecode(src, srcLen, &dst, &dstLen, d.status.c) - if err := d.status.Err(); err != nil { - return err - } - s := ptr.Interface().(*string) - *s = C.GoStringN(dst, C.int(dstLen)) - return nil - } - val := reflect.Indirect(ptr) - val.Set(reflect.MakeSlice(typeOf(String, shape), int(shape[0]), int(shape[0]))) - for i := 0; i < val.Len(); i++ { - if err := d.decode(val.Index(i).Addr(), shape[1:]); err != nil { - return err - } - } - return nil -} - func bug(format string, args ...interface{}) error { return fmt.Errorf("BUG: Please report at https://github.com/tensorflow/tensorflow/issues with the note: Go TensorFlow %v: %v", Version(), fmt.Sprintf(format, args...)) } @@ -489,22 +615,3 @@ func isTensorSerializable(dataType DataType) error { return fmt.Errorf("serialization of tensors with the DataType %d is not yet supported, see https://github.com/tensorflow/tensorflow/issues/6003", dataType) } } - -// nativeEndian is the byte order for the local platform. Used to send back and -// forth Tensors with the C API. We test for endianness at runtime because -// some architectures can be booted into different endian modes. -var nativeEndian binary.ByteOrder - -func init() { - buf := [2]byte{} - *(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD) - - switch buf { - case [2]byte{0xCD, 0xAB}: - nativeEndian = binary.LittleEndian - case [2]byte{0xAB, 0xCD}: - nativeEndian = binary.BigEndian - default: - panic("Could not determine native endianness.") - } -} diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index dc533cd3e1c..ebfbdecf6c8 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -18,6 +18,7 @@ package tensorflow import ( "bytes" + "fmt" "io" "reflect" "testing" @@ -276,6 +277,7 @@ func TestReadTensorReadAll(t *testing.T) { } func benchmarkNewTensor(b *testing.B, v interface{}) { + b.ReportAllocs() for i := 0; i < b.N; i++ { if t, err := NewTensor(v); err != nil || t == nil { b.Fatalf("(%v, %v)", t, err) @@ -283,32 +285,68 @@ func benchmarkNewTensor(b *testing.B, v interface{}) { } } -func BenchmarkNewTensor(b *testing.B) { - var ( - // Some sample sizes from the Inception image labeling model. - // Where input tensors correspond to a 224x224 RGB image - // flattened into a vector. - vector [224 * 224 * 3]int32 - ) - b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) }) -} +func benchmarkValueTensor(b *testing.B, v interface{}) { + t, err := NewTensor(v) + if err != nil { + b.Fatalf("(%v, %v)", t, err) + } + b.ReportAllocs() + b.ResetTimer() -func benchmarkDecodeTensor(b *testing.B, t *Tensor) { for i := 0; i < b.N; i++ { _ = t.Value() } } -func BenchmarkDecodeTensor(b *testing.B) { - var ( - // Some sample sizes from the Inception image labeling model. - // Where input tensors correspond to a 224x224 RGB image - // flattened into a vector. - vector [224 * 224 * 3]int32 - ) - t, err := NewTensor(vector) - if err != nil { - b.Fatalf("(%v, %v)", t, err) +func BenchmarkTensor(b *testing.B) { + // Some sample sizes from the Inception image labeling model. + // Where input tensors correspond to a 224x224 RGB image + // flattened into a vector. + var vector [224 * 224 * 3]int32 + var arrays [100][100][100]int32 + + l3 := make([][][]float32, 100) + l2 := make([][]float32, 100*100) + l1 := make([]float32, 100*100*100) + for i := range l2 { + l2[i] = l1[i*100 : (i+1)*100] } - b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) }) + for i := range l3 { + l3[i] = l2[i*100 : (i+1)*100] + } + + s1 := make([]string, 100*100*100) + s2 := make([][]string, 100*100) + s3 := make([][][]string, 100) + for i := range s1 { + s1[i] = "cheesit" + } + for i := range s2 { + s2[i] = s1[i*100 : (i+1)*100] + } + for i := range s3 { + s3[i] = s2[i*100 : (i+1)*100] + } + + tests := []interface{}{ + vector, + arrays, + l1, + l2, + l3, + s1, + s2, + s3, + } + b.Run("New", func(b *testing.B) { + for _, test := range tests { + b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkNewTensor(b, test) }) + } + }) + b.Run("Value", func(b *testing.B) { + for _, test := range tests { + b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkValueTensor(b, test) }) + } + }) + }