Rollback due to internal failure.
PR #37190: Re-instate PR #36578: Go: NewTensor & Value performance improvement Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/37190 See https://github.com/tensorflow/tensorflow/pull/36578. Have fixed issues with pointer arithmetic checks introduced with Go 1.14 that caused original PR to be reverted PiperOrigin-RevId: 300607819 Change-Id: I4fde4fa0449cd7bd56e03fd2e0f4ab739915a96b
This commit is contained in:
parent
3511dbe928
commit
00f041777f
@ -94,22 +94,9 @@ func NewTensor(value interface{}) (*Tensor, error) {
|
||||
raw := tensorData(t.c)
|
||||
buf := bytes.NewBuffer(raw[:0:len(raw)])
|
||||
if dataType != String {
|
||||
if isAllArray(val.Type()) {
|
||||
// We have arrays all the way down, or just primitive types. We can
|
||||
// just copy the memory in as it is all contiguous.
|
||||
if err := copyPtr(buf, unpackEFace(value).data, int(val.Type().Size())); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// When there are slices involved the memory for each leaf slice may
|
||||
// not be contiguous with the others or in the order we might
|
||||
// expect, so we need to work our way down to each slice of
|
||||
// primitives and copy them individually
|
||||
if err := encodeTensorWithSlices(buf, val, shape); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := encodeTensor(buf, val, shape); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if uintptr(buf.Len()) != nbytes {
|
||||
return nil, bug("NewTensor incorrectly calculated the size of a tensor with type %v and shape %v as %v bytes instead of %v", dataType, shape, nbytes, buf.Len())
|
||||
}
|
||||
@ -125,43 +112,6 @@ func NewTensor(value interface{}) (*Tensor, error) {
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// isAllArray returns true if type is a primitive type or an array of primitive
|
||||
// types or an array of ... etc.. When this is true the data we want is
|
||||
// contiguous in RAM.
|
||||
func isAllArray(typ reflect.Type) bool {
|
||||
switch typ.Kind() {
|
||||
case reflect.Slice:
|
||||
return false
|
||||
case reflect.Array:
|
||||
return isAllArray(typ.Elem())
|
||||
default:
|
||||
// We know the type is slices/arrays of slices/arrays of primitive types.
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// eface defines what an interface type actually is: a pointer to type
|
||||
// information about the encapsulated type and a pointer to the encapsulated
|
||||
// value.
|
||||
type eface struct {
|
||||
rtype unsafe.Pointer
|
||||
data unsafe.Pointer
|
||||
}
|
||||
|
||||
// unpackEFace gives us an effient way to get us a pointer to the value carried
|
||||
// in an interface. If you wrap a pointer type in an interface then the pointer
|
||||
// is directly stored in the interface struct. If you wrap a value type in an
|
||||
// interface then the compiler copies the value into a newly allocated piece of
|
||||
// memory and stores a pointer to that memory in the interface. So we're
|
||||
// guaranteed to get a pointer. Go reflection doesn't expose the pointer to
|
||||
// value types straightforwardly as it doesn't want you to think you have a
|
||||
// reference to the original value. But we just want a pointer to make it
|
||||
// efficient to read the value, so cheating like this should be safe and
|
||||
// reasonable.
|
||||
func unpackEFace(obj interface{}) *eface {
|
||||
return (*eface)(unsafe.Pointer(&obj))
|
||||
}
|
||||
|
||||
// ReadTensor constructs a Tensor with the provided type and shape from the
|
||||
// serialized tensor contents in r.
|
||||
//
|
||||
@ -218,93 +168,23 @@ func (t *Tensor) Shape() []int64 { return t.shape }
|
||||
// Tensor(int64, 0): int64
|
||||
// Tensor(float64, 3): [][][]float64
|
||||
func (t *Tensor) Value() interface{} {
|
||||
raw := tensorData(t.c)
|
||||
shape := t.Shape()
|
||||
dt := t.DataType()
|
||||
if dt != String {
|
||||
return decodeTensor(raw, shape, dt).Interface()
|
||||
}
|
||||
|
||||
typ := typeOf(dt, shape)
|
||||
typ := typeOf(t.DataType(), t.Shape())
|
||||
val := reflect.New(typ)
|
||||
nflattened := numElements(shape)
|
||||
d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()}
|
||||
if err := d.decode(val, shape); err != nil {
|
||||
panic(bug("unable to decode String tensor with shape %v - %v", shape, err))
|
||||
raw := tensorData(t.c)
|
||||
if t.DataType() != String {
|
||||
if err := decodeTensor(bytes.NewReader(raw), t.Shape(), typ, val); err != nil {
|
||||
panic(bug("unable to decode Tensor of type %v and shape %v - %v", t.DataType(), t.Shape(), err))
|
||||
}
|
||||
} else {
|
||||
nflattened := numElements(t.Shape())
|
||||
d := stringDecoder{offsets: bytes.NewReader(raw[0 : 8*nflattened]), data: raw[8*nflattened:], status: newStatus()}
|
||||
if err := d.decode(val, t.Shape()); err != nil {
|
||||
panic(bug("unable to decode String tensor with shape %v - %v", t.Shape(), err))
|
||||
}
|
||||
}
|
||||
return reflect.Indirect(val).Interface()
|
||||
}
|
||||
|
||||
func decodeTensor(raw []byte, shape []int64, dt DataType) reflect.Value {
|
||||
typ := typeForDataType(dt)
|
||||
// Create a 1-dimensional slice of the base large enough for the data and
|
||||
// copy the data in.
|
||||
n := int(numElements(shape))
|
||||
l := n * int(typ.Size())
|
||||
typ = reflect.SliceOf(typ)
|
||||
slice := reflect.MakeSlice(typ, n, n)
|
||||
baseBytes := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: slice.Pointer(),
|
||||
Len: l,
|
||||
Cap: l,
|
||||
}))
|
||||
copy(baseBytes, raw)
|
||||
// Now we have the data in place in the base slice we can add the
|
||||
// dimensions. We want to walk backwards through the shape. If the shape is
|
||||
// length 1 or 0 then we're already done.
|
||||
if len(shape) == 0 {
|
||||
return slice.Index(0)
|
||||
}
|
||||
if len(shape) == 1 {
|
||||
return slice
|
||||
}
|
||||
// We have a special case if the tensor has no data. Our backing slice is
|
||||
// empty, but we still want to create slices following the shape. In this
|
||||
// case only the final part of the shape will be 0 and we want to recalculate
|
||||
// n at this point ignoring that 0.
|
||||
// For example if our shape is 3 * 2 * 0 then n will be zero, but we still
|
||||
// want 6 zero length slices to group as follows.
|
||||
// {{} {}} {{} {}} {{} {}}
|
||||
if n == 0 {
|
||||
n = int(numElements(shape[:len(shape)-1]))
|
||||
}
|
||||
for i := len(shape) - 2; i >= 0; i-- {
|
||||
underlyingSize := typ.Elem().Size()
|
||||
typ = reflect.SliceOf(typ)
|
||||
subsliceLen := int(shape[i+1])
|
||||
if subsliceLen != 0 {
|
||||
n = n / subsliceLen
|
||||
}
|
||||
// Just using reflection it is difficult to avoid unnecessary
|
||||
// allocations while setting up the sub-slices as the Slice function on
|
||||
// a slice Value allocates. So we end up doing pointer arithmetic!
|
||||
// Pointer() on a slice gives us access to the data backing the slice.
|
||||
// We insert slice headers directly into this data.
|
||||
data := slice.Pointer()
|
||||
nextSlice := reflect.MakeSlice(typ, n, n)
|
||||
|
||||
for j := 0; j < n; j++ {
|
||||
// This is equivalent to nSlice[j] = slice[j*subsliceLen: (j+1)*subsliceLen]
|
||||
setSliceInSlice(nextSlice, j, reflect.SliceHeader{
|
||||
Data: data + (uintptr(j*subsliceLen) * underlyingSize),
|
||||
Len: subsliceLen,
|
||||
Cap: subsliceLen,
|
||||
})
|
||||
}
|
||||
|
||||
slice = nextSlice
|
||||
}
|
||||
return slice
|
||||
}
|
||||
|
||||
//go:nocheckptr
|
||||
// setSliceInSlice sets slice[index] = content. We set nocheckptr as the pointer
|
||||
// checking stuff doesn't seem to work well with reflection.
|
||||
func setSliceInSlice(slice reflect.Value, index int, content reflect.SliceHeader) {
|
||||
const sliceSize = unsafe.Sizeof(reflect.SliceHeader{})
|
||||
*(*reflect.SliceHeader)(unsafe.Pointer(slice.Pointer() + (uintptr(index) * sliceSize))) = content
|
||||
}
|
||||
|
||||
// WriteContentsTo writes the serialized contents of t to w.
|
||||
//
|
||||
// Returns the number of bytes written. See ReadTensor for
|
||||
@ -381,18 +261,18 @@ func shapeAndDataTypeOf(val reflect.Value) (shape []int64, dt DataType, err erro
|
||||
return shape, dt, fmt.Errorf("unsupported type %v", typ)
|
||||
}
|
||||
|
||||
func typeForDataType(dt DataType) reflect.Type {
|
||||
for _, t := range types {
|
||||
if dt == DataType(t.dataType) {
|
||||
return t.typ
|
||||
}
|
||||
}
|
||||
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
|
||||
}
|
||||
|
||||
// typeOf converts from a DataType and Shape to the equivalent Go type.
|
||||
func typeOf(dt DataType, shape []int64) reflect.Type {
|
||||
ret := typeForDataType(dt)
|
||||
var ret reflect.Type
|
||||
for _, t := range types {
|
||||
if dt == DataType(t.dataType) {
|
||||
ret = t.typ
|
||||
break
|
||||
}
|
||||
}
|
||||
if ret == nil {
|
||||
panic(bug("DataType %v is not supported (see https://www.tensorflow.org/code/tensorflow/core/framework/types.proto)", dt))
|
||||
}
|
||||
for range shape {
|
||||
ret = reflect.SliceOf(ret)
|
||||
}
|
||||
@ -422,53 +302,92 @@ func byteSizeOfEncodedStrings(val interface{}) uintptr {
|
||||
return size
|
||||
}
|
||||
|
||||
// encodeTensorWithSlices writes v to the specified buffer using the format specified in
|
||||
// encodeTensor writes v to the specified buffer using the format specified in
|
||||
// c_api.h. Use stringEncoder for String tensors.
|
||||
func encodeTensorWithSlices(w *bytes.Buffer, v reflect.Value, shape []int64) error {
|
||||
// If current dimension is a slice, verify that it has the expected size
|
||||
// Go's type system makes that guarantee for arrays.
|
||||
if v.Kind() == reflect.Slice {
|
||||
expected := int(shape[0])
|
||||
if v.Len() != expected {
|
||||
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
|
||||
func encodeTensor(w *bytes.Buffer, v reflect.Value, shape []int64) error {
|
||||
switch v.Kind() {
|
||||
case reflect.Bool:
|
||||
b := byte(0)
|
||||
if v.Bool() {
|
||||
b = 1
|
||||
}
|
||||
} else if v.Kind() != reflect.Array {
|
||||
return fmt.Errorf("unsupported type %v", v.Type())
|
||||
}
|
||||
|
||||
// Once we have just a single dimension we can just copy the data
|
||||
if len(shape) == 1 && v.Len() > 0 {
|
||||
elt := v.Index(0)
|
||||
if !elt.CanAddr() {
|
||||
panic("cannot take address")
|
||||
}
|
||||
ptr := unsafe.Pointer(elt.Addr().Pointer())
|
||||
return copyPtr(w, ptr, v.Len()*int(elt.Type().Size()))
|
||||
}
|
||||
|
||||
subShape := shape[1:]
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
err := encodeTensorWithSlices(w, v.Index(i), subShape)
|
||||
if err != nil {
|
||||
if err := w.WriteByte(b); err != nil {
|
||||
return err
|
||||
}
|
||||
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
if err := binary.Write(w, nativeEndian, v.Interface()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
case reflect.Array, reflect.Slice:
|
||||
// If current dimension is a slice, verify that it has the expected size
|
||||
// Go's type system makes that guarantee for arrays.
|
||||
if v.Kind() == reflect.Slice {
|
||||
expected := int(shape[0])
|
||||
if v.Len() != expected {
|
||||
return fmt.Errorf("mismatched slice lengths: %d and %d", v.Len(), expected)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimisation: if only one dimension is left we can use binary.Write() directly for this slice
|
||||
if len(shape) == 1 && v.Len() > 0 {
|
||||
switch v.Index(0).Kind() {
|
||||
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
return binary.Write(w, nativeEndian, v.Interface())
|
||||
}
|
||||
}
|
||||
|
||||
subShape := shape[1:]
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
err := encodeTensor(w, v.Index(i), subShape)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupported type %v", v.Type())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// copyPtr copies the backing data for a slice or array directly into w. Note
|
||||
// we don't need to worry about byte ordering because we want the natural byte
|
||||
// order for the machine we're running on.
|
||||
func copyPtr(w *bytes.Buffer, ptr unsafe.Pointer, l int) error {
|
||||
// Convert our slice header into a []byte so we can call w.Write
|
||||
b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
|
||||
Data: uintptr(ptr),
|
||||
Len: l,
|
||||
Cap: l,
|
||||
}))
|
||||
_, err := w.Write(b)
|
||||
return err
|
||||
// decodeTensor decodes the Tensor from the buffer to ptr using the format
|
||||
// specified in c_api.h. Use stringDecoder for String tensors.
|
||||
func decodeTensor(r *bytes.Reader, shape []int64, typ reflect.Type, ptr reflect.Value) error {
|
||||
switch typ.Kind() {
|
||||
case reflect.Bool:
|
||||
b, err := r.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ptr.Elem().SetBool(b == 1)
|
||||
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
if err := binary.Read(r, nativeEndian, ptr.Interface()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
case reflect.Slice:
|
||||
val := reflect.Indirect(ptr)
|
||||
val.Set(reflect.MakeSlice(typ, int(shape[0]), int(shape[0])))
|
||||
|
||||
// Optimization: if only one dimension is left we can use binary.Read() directly for this slice
|
||||
if len(shape) == 1 && val.Len() > 0 {
|
||||
switch val.Index(0).Kind() {
|
||||
case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
return binary.Read(r, nativeEndian, val.Interface())
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < val.Len(); i++ {
|
||||
if err := decodeTensor(r, shape[1:], typ.Elem(), val.Index(i).Addr()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupported type %v", typ)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type stringEncoder struct {
|
||||
|
@ -18,7 +18,6 @@ package tensorflow
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"testing"
|
||||
@ -277,7 +276,6 @@ func TestReadTensorReadAll(t *testing.T) {
|
||||
}
|
||||
|
||||
func benchmarkNewTensor(b *testing.B, v interface{}) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if t, err := NewTensor(v); err != nil || t == nil {
|
||||
b.Fatalf("(%v, %v)", t, err)
|
||||
@ -285,52 +283,32 @@ func benchmarkNewTensor(b *testing.B, v interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
func benchmarkValueTensor(b *testing.B, v interface{}) {
|
||||
t, err := NewTensor(v)
|
||||
if err != nil {
|
||||
b.Fatalf("(%v, %v)", t, err)
|
||||
}
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
func BenchmarkNewTensor(b *testing.B) {
|
||||
var (
|
||||
// Some sample sizes from the Inception image labeling model.
|
||||
// Where input tensors correspond to a 224x224 RGB image
|
||||
// flattened into a vector.
|
||||
vector [224 * 224 * 3]int32
|
||||
)
|
||||
b.Run("[150528]", func(b *testing.B) { benchmarkNewTensor(b, vector) })
|
||||
}
|
||||
|
||||
func benchmarkDecodeTensor(b *testing.B, t *Tensor) {
|
||||
for i := 0; i < b.N; i++ {
|
||||
_ = t.Value()
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTensor(b *testing.B) {
|
||||
// Some sample sizes from the Inception image labeling model.
|
||||
// Where input tensors correspond to a 224x224 RGB image
|
||||
// flattened into a vector.
|
||||
var vector [224 * 224 * 3]int32
|
||||
var arrays [100][100][100]int32
|
||||
|
||||
l3 := make([][][]float32, 100)
|
||||
l2 := make([][]float32, 100*100)
|
||||
l1 := make([]float32, 100*100*100)
|
||||
for i := range l2 {
|
||||
l2[i] = l1[i*100 : (i+1)*100]
|
||||
func BenchmarkDecodeTensor(b *testing.B) {
|
||||
var (
|
||||
// Some sample sizes from the Inception image labeling model.
|
||||
// Where input tensors correspond to a 224x224 RGB image
|
||||
// flattened into a vector.
|
||||
vector [224 * 224 * 3]int32
|
||||
)
|
||||
t, err := NewTensor(vector)
|
||||
if err != nil {
|
||||
b.Fatalf("(%v, %v)", t, err)
|
||||
}
|
||||
for i := range l3 {
|
||||
l3[i] = l2[i*100 : (i+1)*100]
|
||||
}
|
||||
|
||||
tests := []interface{}{
|
||||
vector,
|
||||
arrays,
|
||||
l1,
|
||||
l2,
|
||||
l3,
|
||||
}
|
||||
b.Run("New", func(b *testing.B) {
|
||||
for _, test := range tests {
|
||||
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkNewTensor(b, test) })
|
||||
}
|
||||
})
|
||||
b.Run("Value", func(b *testing.B) {
|
||||
for _, test := range tests {
|
||||
b.Run(fmt.Sprintf("%T", test), func(b *testing.B) { benchmarkValueTensor(b, test) })
|
||||
}
|
||||
})
|
||||
|
||||
b.Run("[150528]", func(b *testing.B) { benchmarkDecodeTensor(b, t) })
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user