PR : trying again: Big performance gains for Go NewTensor and Value

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/39117

After some helpful comments in https://github.com/tensorflow/tensorflow/pull/36578#issuecomment-622308355 I'm trying again with this performance improvement.

Please please please if it gets rolled back again because of "internal test" failures, please get some kind of debug information. Stack traces or something. Any kind of clue.

```
name                                  old time/op    new time/op    delta
Tensor/New/[150528]int32-16             1.78ms ± 4%    0.13ms ±10%   -92.63%  (p=0.000 n=8+7)
Tensor/New/[100][100][100]int32-16      13.1ms ± 1%     0.9ms ±53%   -92.81%  (p=0.000 n=8+8)
Tensor/New/[]float32-16                 3.72ms ± 1%    0.97ms ±30%   -74.04%  (p=0.000 n=8+8)
Tensor/New/[][]float32-16               4.83ms ± 2%    1.32ms ± 8%   -72.69%  (p=0.000 n=8+8)
Tensor/New/[][][]float32-16             4.81ms ± 1%    1.32ms ± 4%   -72.51%  (p=0.001 n=8+6)
Tensor/New/[]string-16                   466ms ± 1%      34ms ± 4%   -92.60%  (p=0.001 n=7+7)
Tensor/New/[][]string-16                 460ms ± 1%      35ms ± 1%   -92.45%  (p=0.000 n=8+8)
Tensor/New/[][][]string-16               462ms ± 2%      36ms ± 5%   -92.14%  (p=0.000 n=8+8)
Tensor/Value/[150528]int32-16            647µs ± 3%      82µs ± 1%   -87.28%  (p=0.000 n=8+8)
Tensor/Value/[100][100][100]int32-16    6.43ms ± 1%    0.99ms ± 3%   -84.63%  (p=0.000 n=8+8)
Tensor/Value/[]float32-16               5.57ms ± 3%    1.04ms ± 7%   -81.26%  (p=0.000 n=8+8)
Tensor/Value/[][]float32-16             6.84ms ± 1%    1.51ms ± 1%   -77.96%  (p=0.000 n=8+8)
Tensor/Value/[][][]float32-16           6.87ms ± 1%    1.52ms ± 3%   -77.80%  (p=0.001 n=7+7)
Tensor/Value/[]string-16                 268ms ± 3%      20ms ± 2%   -92.45%  (p=0.000 n=8+8)
Tensor/Value/[][]string-16               269ms ± 2%      20ms ± 1%   -92.46%  (p=0.000 n=8+7)
Tensor/Value/[][][]string-16             271ms ± 2%      20ms ± 1%   -92.55%  (p=0.000 n=8+8)

name                                  old alloc/op   new alloc/op   delta
Tensor/New/[150528]int32-16              606kB ± 0%       0kB ± 0%   -99.99%  (p=0.000 n=8+8)
Tensor/New/[100][100][100]int32-16      4.16MB ± 0%    0.00MB ± 0%  -100.00%  (p=0.000 n=7+8)
Tensor/New/[]float32-16                 4.01MB ± 0%    0.00MB ± 0%  -100.00%  (p=0.002 n=7+8)
Tensor/New/[][]float32-16               4.48MB ± 0%    0.00MB ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/New/[][][]float32-16             4.48MB ± 0%    0.00MB ± 0%  -100.00%  (p=0.002 n=7+8)
Tensor/New/[]string-16                  48.0MB ± 0%     0.0MB ± 0%  -100.00%  (p=0.000 n=7+8)
Tensor/New/[][]string-16                48.3MB ± 0%     0.0MB ± 0%  -100.00%  (p=0.000 n=7+8)
Tensor/New/[][][]string-16              48.3MB ± 0%     0.0MB ± 0%  -100.00%  (p=0.000 n=7+8)
Tensor/Value/[150528]int32-16           1.21MB ± 0%    0.61MB ± 0%   -50.00%  (p=0.000 n=8+8)
Tensor/Value/[100][100][100]int32-16    9.23MB ± 0%    4.25MB ± 0%   -53.93%  (p=0.000 n=8+8)
Tensor/Value/[]float32-16               8.01MB ± 0%    4.01MB ± 0%   -50.00%  (p=0.000 n=8+7)
Tensor/Value/[][]float32-16             9.21MB ± 0%    4.25MB ± 0%   -53.82%  (p=0.000 n=8+8)
Tensor/Value/[][][]float32-16           9.23MB ± 0%    4.25MB ± 0%   -53.93%  (p=0.000 n=8+8)
Tensor/Value/[]string-16                56.0MB ± 0%    23.0MB ± 0%   -58.91%  (p=0.000 n=8+7)
Tensor/Value/[][]string-16              58.5MB ± 0%    23.3MB ± 0%   -60.23%  (p=0.000 n=8+8)
Tensor/Value/[][][]string-16            58.5MB ± 0%    23.3MB ± 0%   -60.25%  (p=0.001 n=7+7)

name                                  old allocs/op  new allocs/op  delta
Tensor/New/[150528]int32-16               4.00 ± 0%      2.00 ± 0%   -50.00%  (p=0.000 n=8+8)
Tensor/New/[100][100][100]int32-16       10.0k ± 0%      0.0k ± 0%   -99.96%  (p=0.000 n=8+8)
Tensor/New/[]float32-16                   4.00 ± 0%      2.00 ± 0%   -50.00%  (p=0.000 n=8+8)
Tensor/New/[][]float32-16                20.0k ± 0%      0.0k ± 0%   -99.99%  (p=0.000 n=8+8)
Tensor/New/[][][]float32-16              20.0k ± 0%      0.0k ± 0%   -99.98%  (p=0.000 n=8+8)
Tensor/New/[]string-16                   4.00M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/New/[][]string-16                 4.01M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/New/[][][]string-16               4.01M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/Value/[150528]int32-16             7.00 ± 0%      2.00 ± 0%   -71.43%  (p=0.000 n=8+8)
Tensor/Value/[100][100][100]int32-16     40.2k ± 0%      0.0k ± 0%   -99.99%  (p=0.000 n=8+8)
Tensor/Value/[]float32-16                 7.00 ± 0%      2.00 ± 0%   -71.43%  (p=0.000 n=8+8)
Tensor/Value/[][]float32-16              40.0k ± 0%      0.0k ± 0%   -99.99%  (p=0.000 n=8+8)
Tensor/Value/[][][]float32-16            40.2k ± 0%      0.0k ± 0%   -99.99%  (p=0.000 n=8+8)
Tensor/Value/[]string-16                 5.00M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/Value/[][]string-16               5.02M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
Tensor/Value/[][][]string-16             5.02M ± 0%     0.00M ± 0%  -100.00%  (p=0.000 n=8+8)
```
Copybara import of the project:

--
fb7e6b1665204f78863b70e7f4998cabeef0898d by Phil Pearl <phil.pearl@ravelin.com>:

Add some more benchmarks

--
4ae853272b8792a599cdbd5c5af5422b23d90366 by Phil Pearl <phil.pearl@ravelin.com>:

Go: large performance gains for NewTensor and Value
COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/39117 from philpearl:master 4ae853272b8792a599cdbd5c5af5422b23d90366
PiperOrigin-RevId: 318083887
Change-Id: Ie211646f11922c5b0971bbf64bdb4a0c6a844985
This commit is contained in:
Phil Pearl 2020-06-24 09:53:18 -07:00 committed by TensorFlower Gardener
parent 05c284bcc3
commit cc1273649c
2 changed files with 343 additions and 198 deletions

View File

@ -26,6 +26,7 @@ import (
"encoding/binary"
"fmt"
"io"
"math/bits"
"reflect"
"runtime"
"unsafe"
@ -80,7 +81,7 @@ func NewTensor(value interface{}) (*Tensor, error) {
if dataType == String {
// TF_STRING tensors are encoded as an array of 8-byte offsets
// followed by string data. See c_api.h.
nbytes = uintptr(nflattened*8) + byteSizeOfEncodedStrings(value)
nbytes = uintptr(nflattened*8 + int64(byteSizeOfEncodedStrings(val)))
}
var shapePtr *C.int64_t
if len(shape) > 0 {
@ -94,9 +95,22 @@ func NewTensor(value interface{}) (*Tensor, error) {
raw := tensorData(t.c)
buf := bytes.NewBuffer(raw[:0:len(raw)])
if dataType != String {
if err := encodeTensor(buf, val, shape); err != nil {
return nil, err
if isAllArray(val.Type()) {
// We have arrays all the way down, or just primitive types. We can
// just copy the memory in as it is all contiguous.
if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil {
return nil, err
}
} else {
// When there are slices involved the memory for each leaf slice may
// not be contiguous with the others or in the order we might
// expect, so we need to work our way down to each slice of
// primitives and copy them individually
if err := encodeTensorWithSlices(buf, val, shape); err != nil {
return nil, err
}
}
if uintptr(buf.Len()) != nbytes {
return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
}
@ -112,6 +126,43 @@ func NewTensor(value interface{}) (*Tensor, error) {
return t, nil
}
// isAllArray returns true if type is a primitive type or an array of primitive
// types or an array of ... etc.. When this is true the data we want is
// contiguous in RAM.
func isAllArray(typ reflect.Type) bool {
switch typ.Kind() {
case reflect.Slice:
return false
case reflect.Array:
return isAllArray(typ.Elem())
default:
// We know the type is slices/arrays of slices/arrays of primitive types.
return true
}
}
// eface defines what an interface type actually is: a pointer to type
// information about the encapsulated type and a pointer to the encapsulated
// value.
type eface struct {
rtype unsafe.Pointer
data unsafe.Pointer
}
// unpackEFace gives us an effient way to get us a pointer to the value carried
// in an interface. If you wrap a pointer type in an interface then the pointer
// is directly stored in the interface struct. If you wrap a value type in an
// interface then the compiler copies the value into a newly allocated piece of
// memory and stores a pointer to that memory in the interface. So we're
// guaranteed to get a pointer. Go reflection doesn't expose the pointer to
// value types straightforwardly as it doesn't want you to think you have a
// reference to the original value. But we just want a pointer to make it
// efficient to read the value, so cheating like this should be safe and
// reasonable.
func unpackEFace(obj interface{}) *eface {
return (*eface)(unsafe.Pointer(&obj))
}
// ReadTensor constructs a Tensor with the provided type and shape from the
// serialized tensor contents in r.
//
@ -168,21 +219,152 @@ func (t *Tensor) Shape() []int64 { return t.shape }
// Tensor(int64, 0): int64
// Tensor(float64, 3): [][][]float64
func (t *Tensor) Value() interface{} {
typ := typeOf(t.DataType(), t.Shape())
val := reflect.New(typ)
raw := tensorData(t.c)
if t.DataType() != String {
if err := decodeTensor(bytes.NewReader(raw), t.Shape(), typ, val); err != nil {
panic(bug("unable to decode Tensor of type %v and shape %v - %v", t.DataType(), t.Shape(), err))
shape := t.Shape()
dt := t.DataType()
return decodeTensor(raw, shape, dt).Interface()
}
func decodeTensor(raw []byte, shape []int64, dt DataType) reflect.Value {
// Create a 1-dimensional slice of the base large enough for the data and
// copy the data in.
n := int(numElements(shape))
var (
slice reflect.Value
typ reflect.Type
)
if dt == String {
strs, err := decodeOneDimString(raw, n)
if err != nil {
panic(bug("unable to decode string with shape %v: %v", shape, err))
}
slice = reflect.ValueOf(strs)
typ = slice.Type()
} else {
nflattened := numElements(t.Shape())
d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()}
if err := d.decode(val, t.Shape()); err != nil {
panic(bug("unable to decode String tensor with shape %v - %v", t.Shape(), err))
}
typ = typeForDataType(dt)
l := n * int(typ.Size())
typ = reflect.SliceOf(typ)
slice = reflect.MakeSlice(typ, n, n)
baseBytes := *(*[]byte)(unsafe.Pointer(&sliceHeader{
Data: unsafe.Pointer(slice.Pointer()),
Len: l,
Cap: l,
}))
copy(baseBytes, raw)
}
return reflect.Indirect(val).Interface()
// Now we have the data in place in the base slice we can add the
// dimensions. We want to walk backwards through the shape. If the shape is
// length 1 or 0 then we're already done.
if len(shape) == 0 {
return slice.Index(0)
}
if len(shape) == 1 {
return slice
}
// We have a special case if the tensor has no data. Our backing slice is
// empty, but we still want to create slices following the shape. In this
// case only the final part of the shape will be 0 and we want to recalculate
// n at this point ignoring that 0.
// For example if our shape is 3 * 2 * 0 then n will be zero, but we still
// want 6 zero length slices to group as follows.
// {{} {}} {{} {}} {{} {}}
if n == 0 {
n = int(numElements(shape[:len(shape)-1]))
}
for i := len(shape) - 2; i >= 0; i-- {
underlyingSize := typ.Elem().Size()
typ = reflect.SliceOf(typ)
subsliceLen := int(shape[i+1])
if subsliceLen != 0 {
n = n / subsliceLen
}
// Just using reflection it is difficult to avoid unnecessary
// allocations while setting up the sub-slices as the Slice function on
// a slice Value allocates. So we end up doing pointer arithmetic!
// Pointer() on a slice gives us access to the data backing the slice.
// We insert slice headers directly into this data.
data := unsafe.Pointer(slice.Pointer())
nextSlice := reflect.MakeSlice(typ, n, n)
for j := 0; j < n; j++ {
// This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen]
setSliceInSlice(nextSlice, j, sliceHeader{
Data: unsafe.Pointer(uintptr(data) + (uintptr(j*subsliceLen) * underlyingSize)),
Len: subsliceLen,
Cap: subsliceLen,
})
}
slice = nextSlice
}
return slice
}
// setSliceInSlice sets slice[index] = content.
func setSliceInSlice(slice reflect.Value, index int, content sliceHeader) {
const sliceSize = unsafe.Sizeof(sliceHeader{})
// We must cast slice.Pointer to uninptr & back again to avoid GC issues.
// See https://github.com/google/go-cmp/issues/167#issuecomment-546093202
*(*sliceHeader)(unsafe.Pointer(uintptr(unsafe.Pointer(slice.Pointer())) + (uintptr(index) * sliceSize))) = content
}
// decodeOneDimString decodes a string tensor into a one-dimensional []string.
func decodeOneDimString(raw []byte, nStrings int) ([]string, error) {
// Start by making an array of all the strings
strs := make([]string, nStrings)
// The first nStrings * 8 bytes of raw are offsets into the second half of
// the raw data. This second half is where the strings are encoded.
offsets := (*(*[]int64)(unsafe.Pointer(&raw)))[:nStrings]
// Reset raw after the offsets. Now the offsets will work relative to raw
raw = raw[nStrings*8:]
// Next we work out the final length of the string data so we can copy the
// good data out of raw (which is owned by the C tensor and won't be safe
// to access if the tensor is freed)
r := bytes.NewReader(raw)
var totalLength int
for _, offset := range offsets {
// At each offset we should find a varint length of a string.
// Errors here should mean the tensor is corrupt.
if _, err := r.Seek(offset, io.SeekStart); err != nil {
return nil, err
}
l, err := binary.ReadUvarint(r)
if err != nil {
return nil, err
}
totalLength += int(l)
}
// Lets allocate a big buffer to carry our string data.
stringData := make([]byte, 0, totalLength)
// Now copy the string data across into our new buffer, keeping track of the
// location of each string in the strs slice.
var cursor int
for i, offset := range offsets {
// At each offset we should find a varint length. Read it
if _, err := r.Seek(offset, io.SeekStart); err != nil {
return nil, err
}
l, err := binary.ReadUvarint(r)
if err != nil {
return nil, err
}
// Then copy the actual string into our large buffer
target := stringData[cursor : cursor+int(l)]
if _, err := r.Read(target); err != nil {
return nil, err
}
// Track where this string data is.
strs[i] = *(*string)(unsafe.Pointer(&target))
cursor += int(l)
}
// So now we have a big slice of strings
return strs, nil
}
// WriteContentsTo writes the serialized contents of t to w.
@ -261,18 +443,18 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro
return shape, dt, fmt.Errorf("unsupported type %v", typ)
}
// typeOf converts from a DataType and Shape to the equivalent Go type.
func typeOf(dt DataType, shape []int64) reflect.Type {
var ret reflect.Type
func typeForDataType(dt DataType) reflect.Type {
for _, t := range types {
if dt == DataType(t.dataType) {
ret = t.typ
break
return t.typ
}
}
if ret == nil {
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
}
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
}
// typeOf converts from a DataType and Shape to the equivalent Go type.
func typeOf(dt DataType, shape []int64) reflect.Type {
ret := typeForDataType(dt)
for range shape {
ret = reflect.SliceOf(ret)
}
@ -289,109 +471,93 @@ func numElements(shape []int64) int64 {
// byteSizeOfEncodedStrings returns the size of the encoded strings in val.
// val MUST be a string, or a container (array/slice etc.) of strings.
func byteSizeOfEncodedStrings(val interface{}) uintptr {
if s, ok := val.(string); ok {
return uintptr(C.TF_StringEncodedSize(C.size_t(len(s))))
// Tensorflow encodes strings as the varint encoded length followed by the
// string bytes. We could call into the C library to do this but cgo has a heavy
// overhead. So we just do that calculation in Go
func byteSizeOfEncodedStrings(val reflect.Value) int {
if val.Kind() == reflect.String {
return sizeVarUint(uint64(val.Len())) + val.Len()
}
if val.Kind() != reflect.Slice && val.Kind() != reflect.Array {
panic(fmt.Sprintf("unexpected type %s", val.Type()))
}
// Otherwise must be an array or slice.
var size uintptr
v := reflect.ValueOf(val)
for i := 0; i < v.Len(); i++ {
size += byteSizeOfEncodedStrings(v.Index(i).Interface())
var size int
for i := 0; i < val.Len(); i++ {
size += byteSizeOfEncodedStrings(val.Index(i))
}
return size
}
// encodeTensor writes v to the specified buffer using the format specified in
// c_api.h. Use stringEncoder for String tensors.
func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
switch v.Kind() {
case reflect.Bool:
b := byte(0)
if v.Bool() {
b = 1
}
if err := w.WriteByte(b); err != nil {
return err
}
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
return err
}
case reflect.Array, reflect.Slice:
// If current dimension is a slice, verify that it has the expected size
// Go's type system makes that guarantee for arrays.
if v.Kind() == reflect.Slice {
expected := int(shape[0])
if v.Len() != expected {
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
}
}
// Optimisation: if only one dimension is left we can use binary.Write() directly for this slice
if len(shape) == 1 && v.Len() > 0 {
switch v.Index(0).Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
return binary.Write(w, nativeEndian, v.Interface())
}
}
subShape := shape[1:]
for i := 0; i < v.Len(); i++ {
err := encodeTensor(w, v.Index(i), subShape)
if err != nil {
return err
}
}
default:
return fmt.Errorf("unsupported type %v", v.Type())
// sizeVarUint determines how many bytes it would take to encode the int v as
// an unsigned varint
func sizeVarUint(v uint64) int {
if v < 0x80 {
return 1
}
return nil
bits := bits.Len64(v)
return (bits + 6) / 7
}
// decodeTensor decodes the Tensor from the buffer to ptr using the format
// specified in c_api.h. Use stringDecoder for String tensors.
func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.Value) error {
switch typ.Kind() {
case reflect.Bool:
b, err := r.ReadByte()
// encodeTensorWithSlices writes v to the specified buffer using the format specified in
// c_api.h. Use stringEncoder for String tensors.
func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) error {
// If current dimension is a slice, verify that it has the expected size
// Go's type system makes that guarantee for arrays.
if v.Kind() == reflect.Slice {
expected := int(shape[0])
if v.Len() != expected {
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
}
} else if v.Kind() != reflect.Array {
return fmt.Errorf("unsupported type %v", v.Type())
}
// Once we have just a single dimension we can just copy the data
if len(shape) == 1 && v.Len() > 0 {
elt := v.Index(0)
if !elt.CanAddr() {
panic("cannot take address")
}
ptr := unsafe.Pointer(elt.Addr().Pointer())
return copyPtr(w, ptr, v.Len()*int(elt.Type().Size()))
}
subShape := shape[1:]
for i := 0; i < v.Len(); i++ {
err := encodeTensorWithSlices(w, v.Index(i), subShape)
if err != nil {
return err
}
ptr.Elem().SetBool(b == 1)
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
return err
}
case reflect.Slice:
val := reflect.Indirect(ptr)
val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0])))
// Optimization: if only one dimension is left we can use binary.Read() directly for this slice
if len(shape) == 1 && val.Len() > 0 {
switch val.Index(0).Kind() {
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
return binary.Read(r, nativeEndian, val.Interface())
}
}
for i := 0; i < val.Len(); i++ {
if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil {
return err
}
}
default:
return fmt.Errorf("unsupported type %v", typ)
}
return nil
}
// It isn't safe to use reflect.SliceHeader as it uses a uintptr for Data and
// this is not inspected by the garbage collector
type sliceHeader struct {
Data unsafe.Pointer
Len int
Cap int
}
// copyPtr copies the backing data for a slice or array directly into w. Note
// we don't need to worry about byte ordering because we want the natural byte
// order for the machine we're running on.
func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error {
// Convert our slice header into a []byte so we can call w.Write
b := *(*[]byte)(unsafe.Pointer(&sliceHeader{
Data: ptr,
Len: l,
Cap: l,
}))
_, err := w.Write(b)
return err
}
type stringEncoder struct {
offsets io.Writer
offsets *bytes.Buffer
data []byte
offset uint64
status *status
@ -399,19 +565,18 @@ type stringEncoder struct {
func (e *stringEncoder) encode(v reflect.Value, shape []int64) error {
if v.Kind() == reflect.String {
if err := binary.Write(e.offsets, nativeEndian, e.offset); err != nil {
if err := copyPtr(e.offsets, unsafe.Pointer(&e.offset), int(unsafe.Sizeof(e.offset))); err != nil {
return err
}
var (
s = v.Interface().(string)
src = C.CString(s)
srcLen = C.size_t(len(s))
dst = (*C.char)(unsafe.Pointer(&e.data[e.offset]))
dstLen = C.size_t(uint64(len(e.data)) - e.offset)
)
e.offset += uint64(C.TF_StringEncode(src, srcLen, dst, dstLen, e.status.c))
C.free(unsafe.Pointer(src))
return e.status.Err()
// A string is encoded as the varint length followed by the string bytes.
// We do this in Go to avoid the considerable overhead of a cgo call into
// the tensorflow library
s := v.String()
n := binary.PutUvarint(e.data[e.offset:], uint64(len(s)))
e.offset += uint64(n)
n = copy(e.data[e.offset:], s)
e.offset += uint64(n)
return nil
}
if v.Kind() == reflect.Slice {
@ -430,45 +595,6 @@ func (e *stringEncoder) encode(v reflect.Value, shape []int64) error {
return nil
}
type stringDecoder struct {
offsets io.Reader
data []byte
status *status
}
func (d *stringDecoder) decode(ptr reflect.Value, shape []int64) error {
if len(shape) == 0 {
var offset uint64
if err := binary.Read(d.offsets, nativeEndian, &offset); err != nil {
return err
}
var (
src = (*C.char)(unsafe.Pointer(&d.data[offset]))
srcLen = C.size_t(len(d.data)) - C.size_t(offset)
dst *C.char
dstLen C.size_t
)
if offset > uint64(len(d.data)) {
return fmt.Errorf("invalid offsets in String Tensor")
}
C.TF_StringDecode(src, srcLen, &dst, &dstLen, d.status.c)
if err := d.status.Err(); err != nil {
return err
}
s := ptr.Interface().(*string)
*s = C.GoStringN(dst, C.int(dstLen))
return nil
}
val := reflect.Indirect(ptr)
val.Set(reflect.MakeSlice(typeOf(String, shape), int(shape[0]), int(shape[0])))
for i := 0; i < val.Len(); i++ {
if err := d.decode(val.Index(i).Addr(), shape[1:]); err != nil {
return err
}
}
return nil
}
func bug(format string, args ...interface{}) error {
return fmt.Errorf("BUG: Please report at https://github.com/tensorflow/tensorflow/issues with the note: Go TensorFlow %v: %v", Version(), fmt.Sprintf(format, args...))
}
@ -489,22 +615,3 @@ func isTensorSerializable(dataType DataType) error {
return fmt.Errorf("serialization of tensors with the DataType %d is not yet supported, see https://github.com/tensorflow/tensorflow/issues/6003", dataType)
}
}
// nativeEndian is the byte order for the local platform. Used to send back and
// forth Tensors with the C API. We test for endianness at runtime because
// some architectures can be booted into different endian modes.
var nativeEndian binary.ByteOrder
func init() {
buf := [2]byte{}
*(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD)
switch buf {
case [2]byte{0xCD, 0xAB}:
nativeEndian = binary.LittleEndian
case [2]byte{0xAB, 0xCD}:
nativeEndian = binary.BigEndian
default:
panic("Could not determine native endianness.")
}
}

View File

@ -18,6 +18,7 @@ package tensorflow
import (
"bytes"
"fmt"
"io"
"reflect"
"testing"
@ -276,6 +277,7 @@ func TestReadTensorReadAll(t *testing.T) {
}
func benchmarkNewTensor(b *testing.B, v interface{}) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
if t, err := NewTensor(v); err != nil || t == nil {
b.Fatalf("(%v, %v)", t, err)
@ -283,32 +285,68 @@ func benchmarkNewTensor(b *testing.B, v interface{}) {
}
}
func BenchmarkNewTensor(b *testing.B) {
var (
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
vector [224 * 224 * 3]int32
)
b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) })
}
func benchmarkValueTensor(b *testing.B, v interface{}) {
t, err := NewTensor(v)
if err != nil {
b.Fatalf("(%v, %v)", t, err)
}
b.ReportAllocs()
b.ResetTimer()
func benchmarkDecodeTensor(b *testing.B, t *Tensor) {
for i := 0; i < b.N; i++ {
_ = t.Value()
}
}
func BenchmarkDecodeTensor(b *testing.B) {
var (
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
vector [224 * 224 * 3]int32
)
t, err := NewTensor(vector)
if err != nil {
b.Fatalf("(%v, %v)", t, err)
func BenchmarkTensor(b *testing.B) {
// Some sample sizes from the Inception image labeling model.
// Where input tensors correspond to a 224x224 RGB image
// flattened into a vector.
var vector [224 * 224 * 3]int32
var arrays [100][100][100]int32
l3 := make([][][]float32, 100)
l2 := make([][]float32, 100*100)
l1 := make([]float32, 100*100*100)
for i := range l2 {
l2[i] = l1[i*100 : (i+1)*100]
}
b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) })
for i := range l3 {
l3[i] = l2[i*100 : (i+1)*100]
}
s1 := make([]string, 100*100*100)
s2 := make([][]string, 100*100)
s3 := make([][][]string, 100)
for i := range s1 {
s1[i] = "cheesit"
}
for i := range s2 {
s2[i] = s1[i*100 : (i+1)*100]
}
for i := range s3 {
s3[i] = s2[i*100 : (i+1)*100]
}
tests := []interface{}{
vector,
arrays,
l1,
l2,
l3,
s1,
s2,
s3,
}
b.Run("New", func(b *testing.B) {
for _, test := range tests {
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkNewTensor(b, test) })
}
})
b.Run("Value", func(b *testing.B) {
for _, test := range tests {
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkValueTensor(b, test) })
}
})
}