Allow BCast to disable the optimization of grouping consecutive dimensions.

Also move a few helper functions into the BCast class. Before: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 114 114 6062466 different_shapes BM_BCastSetup/1 20 20 34863786 same_shapes After: Benchmark Time(ns) CPU(ns) Iterations ------------------------------------------------ BM_BCastSetup/0 126 126 5562646 different_shapes BM_BCastSetup/1 32 32 21627499 same_shapes The performance hit likely stems from 1-2 branch mispredictions (~7ns per misprediction). Change: 121437477
2016-05-03 17:00:04 -08:00 · 2016-05-03 17:00:04 -08:00 · 90caf0e179
commit 90caf0e179
parent 0393436023
4 changed files with 211 additions and 24 deletions
--- a/tensorflow/core/kernels/cwise_ops_common.cc
+++ b/tensorflow/core/kernels/cwise_ops_common.cc
@ -62,15 +62,15 @@ static TensorShape ToShape(const BCast::Vec& vec) {
 BinaryOpShared::BinaryOpState::BinaryOpState(OpKernelContext* ctx)
    : in0(ctx->input(0)),
      in1(ctx->input(1)),
-      bcast(FromShape(in0.shape()), FromShape(in1.shape())) {
+      bcast(BCast::FromShape(in0.shape()), BCast::FromShape(in1.shape())) {
  if (!bcast.IsValid()) {
    ctx->SetStatus(errors::InvalidArgument("Incompatible shapes: ",
                                           in0.shape().DebugString(), " vs. ",
                                           in1.shape().DebugString()));
    return;
  }
-  OP_REQUIRES_OK(ctx,
-                 ctx->allocate_output(0, ToShape(bcast.output_shape()), &out));
+  OP_REQUIRES_OK(
+      ctx, ctx->allocate_output(0, BCast::ToShape(bcast.output_shape()), &out));
  out_num_elements = out->NumElements();
  in0_num_elements = in0.NumElements();
  in1_num_elements = in1.NumElements();
--- a/tensorflow/core/util/bcast.cc
+++ b/tensorflow/core/util/bcast.cc
@ -21,29 +21,29 @@ namespace tensorflow {
 /* static */
 void BCast::Reverse(Vec* shape) { std::reverse(shape->begin(), shape->end()); }

-BCast::BCast(const Vec& sx, const Vec& sy) {
-  if (sx == sy) {
+BCast::BCast(const Vec& sx, const Vec& sy, const bool fewer_dims_optimization) {
+  if (sx == sy && TF_PREDICT_TRUE(fewer_dims_optimization)) {
    // Fast path for common case of identical shapes for sx and sy
    int64 elements = 1;
    const int n = sx.size();
    output_.resize(n);
    for (int i = 0; i < n; i++) {
-      int64 dim = sx[i];
+      const int64 dim = sx[i];
      elements *= dim;
      output_[i] = dim;
    }
+    result_.push_back(elements);
    x_reshape_.push_back(elements);
    y_reshape_.push_back(elements);
    x_bcast_.push_back(1);
    y_bcast_.push_back(1);
-    result_.push_back(elements);
    // grad_x_reduce_ and grad_y_reduce_ are left as empty
  } else {
    // Reverse the shape of x and y for convenience.
    // After the reverse, 0-th is the inner-most dimension.
    Vec x = sx;
-    Reverse(&x);
    Vec y = sy;
+    Reverse(&x);
    Reverse(&y);

    // 1-extend and align x and y so that they are the same size.
@ -108,11 +108,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
        // Both side are 1s.
        grad_x_reduce_idx_.push_back(n - 1 - i);
        grad_y_reduce_idx_.push_back(n - 1 - i);
+        if (!TF_PREDICT_TRUE(fewer_dims_optimization)) {
+          result_.push_back(o_i);
+          x_reshape_.push_back(x_i);
+          x_bcast_.push_back(bx_i);
+          y_reshape_.push_back(y_i);
+          y_bcast_.push_back(by_i);
+        }
        continue;
-      } else if (prev == curr) {
-        // It is a run of the same cases (no broadcast, x broadcast to
-        // y, y broadcast to x). We can reshape the input so that fewer
-        // dimensions are involved in the intermediate computation.
+      } else if (TF_PREDICT_TRUE(fewer_dims_optimization) && prev == curr) {
+        // It is a run of the same cases(no broadcast, x broadcast to y, y
+        // broadcast to x). We can reshape the input so that fewer dimensions
+        // are involved in the intermediate computation.
        result_.back() *= o_i;
        x_reshape_.back() *= x_i;
        x_bcast_.back() *= bx_i;
@ -150,4 +157,18 @@ BCast::BCast(const Vec& sx, const Vec& sy) {
  }
 }

+BCast::Vec BCast::FromShape(const TensorShape& shape) {
+  const int N = shape.dims();
+  BCast::Vec ret(N);
+  for (int i = 0; i < N; ++i) {
+    ret[i] = shape.dim_size(i);
+  }
+  return ret;
+}
+
+TensorShape BCast::ToShape(const BCast::Vec& vec) {
+  TensorShape shape(vec);
+  return shape;
+}
+
 }  // end namespace tensorflow
--- a/tensorflow/core/util/bcast.h
+++ b/tensorflow/core/util/bcast.h
@ -18,6 +18,7 @@ limitations under the License.

 #include <algorithm>

+#include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/types.h"
@ -73,7 +74,15 @@ class BCast {
  // it's more convenient to manipulate Vec directly for this module.
  typedef gtl::InlinedVector<int64, 4> Vec;

-  BCast(const Vec& x, const Vec& y);
+  // Constructs all helper shapes, following the aforementioned rules.
+  //
+  // If "fewer_dims_optimization" is set to true (the default), the
+  // implementation tries to reduce intermediate dimensions needed to be more
+  // efficient.  This is transparent to the caller.
+  //
+  // If false, all intermediate shapes (except for grad_{x,y}_reduce_idx()) have
+  // the same number of dimensions as the larger of the two inputs.
+  BCast(const Vec& x, const Vec& y, const bool fewer_dims_optimization = true);
  ~BCast() {}

  // Returns true iff two operands are compatible according to the
@ -92,6 +101,10 @@ class BCast {
  const Vec& grad_x_reduce_idx() const { return grad_x_reduce_idx_; }
  const Vec& grad_y_reduce_idx() const { return grad_y_reduce_idx_; }

+  // Static helpers.
+  static Vec FromShape(const TensorShape& shape);
+  static TensorShape ToShape(const BCast::Vec& vec);
+
 private:
  bool valid_ = true;
  Vec x_reshape_;
--- a/tensorflow/core/util/bcast_test.cc
+++ b/tensorflow/core/util/bcast_test.cc
@ -23,8 +23,9 @@ limitations under the License.
 namespace tensorflow {
 namespace {

-string BCast(const tensorflow::BCast::Vec& x, const tensorflow::BCast::Vec& y) {
-  tensorflow::BCast b(x, y);
+string BCast(const tensorflow::BCast::Vec& x, const tensorflow::BCast::Vec& y,
+             const bool fewer_dims_optimization = true) {
+  tensorflow::BCast b(x, y, fewer_dims_optimization);
  if (!b.IsValid()) {
    return "invalid";
  }
@ -43,10 +44,13 @@ string BCast(const tensorflow::BCast::Vec& x, const tensorflow::BCast::Vec& y) {
 }

 TEST(BCastTest, Invalid) {
-  EXPECT_EQ("invalid", BCast({5, 3, 2}, {3}));
-  EXPECT_EQ("invalid", BCast({5, 3, 2}, {2, 2}));
-  EXPECT_EQ("invalid", BCast({5, 3, 2}, {10, 1, 1}));
-  EXPECT_EQ("invalid", BCast({1, 2, 1, 2, 1, 2}, {2, 4, 2, 1, 2, 1}));
+  for (const bool use_optimization : {true, false}) {
+    EXPECT_EQ("invalid", BCast({5, 3, 2}, {3}, use_optimization));
+    EXPECT_EQ("invalid", BCast({5, 3, 2}, {2, 2}, use_optimization));
+    EXPECT_EQ("invalid", BCast({5, 3, 2}, {10, 1, 1}, use_optimization));
+    EXPECT_EQ("invalid",
+              BCast({1, 2, 1, 2, 1, 2}, {2, 4, 2, 1, 2, 1}, use_optimization));
+  }
 }

 TEST(BCastTest, Basic_SameShape) {
@ -56,6 +60,12 @@ TEST(BCastTest, Basic_SameShape) {
            "[2310]"
            "[11,7,5,3,2]"
            "[][]");
+
+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {11, 7, 5, 3, 2}, false),
+            "[11,7,5,3,2][1,1,1,1,1][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][]");
 }

 TEST(BCastTest, Basic_SameShapeWithZeroDim) {
@ -65,6 +75,12 @@ TEST(BCastTest, Basic_SameShapeWithZeroDim) {
            "[0]"
            "[11,7,0,3,2]"
            "[][]");
+
+  EXPECT_EQ(BCast({11, 7, 0, 3, 2}, {11, 7, 0, 3, 2}, false),
+            "[11,7,0,3,2][1,1,1,1,1][11,7,0,3,2][1,1,1,1,1]"
+            "[11,7,0,3,2]"
+            "[11,7,0,3,2]"
+            "[][]");
 }

 TEST(BCastTest, Basic_Scalar_Scalar) {
@ -76,12 +92,24 @@ TEST(BCastTest, Basic_Scalar_Scalar) {
            "[1,1]"
            "[0,1][0,1]");

+  EXPECT_EQ(BCast({1, 1}, {1}, false),
+            "[1,1][1,1][1,1][1,1]"
+            "[1,1]"
+            "[1,1]"
+            "[0,1][0,1]");
+
  // [1] [1, 1]
  EXPECT_EQ(BCast({1}, {1, 1}),
            "[1][1][1][1]"
            "[1]"
            "[1,1]"
            "[0,1][0,1]");
+
+  EXPECT_EQ(BCast({1}, {1, 1}, false),
+            "[1,1][1,1][1,1][1,1]"
+            "[1,1]"
+            "[1,1]"
+            "[0,1][0,1]");
 }

 TEST(BCastTest, Basic_Tensor_Scalar) {
@ -93,12 +121,24 @@ TEST(BCastTest, Basic_Tensor_Scalar) {
            "[11,7,5,3,2]"
            "[][0,1,2,3,4]");

+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {1}, false),
+            "[11,7,5,3,2][1,1,1,1,1][1,1,1,1,1][11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][0,1,2,3,4]");
+
  // [1] [11, 7, 5, 3, 2]
  EXPECT_EQ(BCast({1}, {11, 7, 5, 3, 2}),
            "[1][2310][2310][1]"
            "[2310]"
            "[11,7,5,3,2]"
            "[0,1,2,3,4][]");
+
+  EXPECT_EQ(BCast({1}, {11, 7, 5, 3, 2}, false),
+            "[1,1,1,1,1][11,7,5,3,2][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[0,1,2,3,4][]");
 }

 TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
@ -110,6 +150,12 @@ TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
            "[11,7,5,3,2,1]"
            "[5][0,1,2,3,4,5]");

+  EXPECT_EQ(BCast({11, 7, 5, 3, 2, 1}, {1}, false),
+            "[11,7,5,3,2,1][1,1,1,1,1,1][1,1,1,1,1,1][11,7,5,3,2,1]"
+            "[11,7,5,3,2,1]"
+            "[11,7,5,3,2,1]"
+            "[5][0,1,2,3,4,5]");
+
  // [1] [11, 7, 5, 3, 2, 1]
  EXPECT_EQ(BCast({1}, {11, 7, 5, 3, 2, 1}),
            "[1][2310][2310][1]"
@ -117,6 +163,12 @@ TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
            "[11,7,5,3,2,1]"
            "[0,1,2,3,4,5][5]");

+  EXPECT_EQ(BCast({1}, {11, 7, 5, 3, 2, 1}, false),
+            "[1,1,1,1,1,1][11,7,5,3,2,1][11,7,5,3,2,1][1,1,1,1,1,1]"
+            "[11,7,5,3,2,1]"
+            "[11,7,5,3,2,1]"
+            "[0,1,2,3,4,5][5]");
+
  // Effectively it's a tensor and a scalar.
  // [11, 7, 5, 1, 1, 3, 2, 1] [1]
  EXPECT_EQ(BCast({11, 7, 5, 1, 1, 3, 2, 1, 1}, {1}),
@ -125,12 +177,26 @@ TEST(BCastTest, Basic_Tensor_With_DimSize_1_Scalar) {
            "[11,7,5,1,1,3,2,1,1]"
            "[3,4,7,8][0,1,2,3,4,5,6,7,8]");

+  EXPECT_EQ(BCast({11, 7, 5, 1, 1, 3, 2, 1, 1}, {1}, false),
+            "[11,7,5,1,1,3,2,1,1][1,1,1,1,1,1,1,1,1]"  // x_reshape(), x_bcast()
+            "[1,1,1,1,1,1,1,1,1][11,7,5,1,1,3,2,1,1]"  // y_reshape(), y_bcast()
+            "[11,7,5,1,1,3,2,1,1]"
+            "[11,7,5,1,1,3,2,1,1]"
+            "[3,4,7,8][0,1,2,3,4,5,6,7,8]");
+
  // [1] [11, 7, 5, 1, 1, 3, 2, 1]
  EXPECT_EQ(BCast({1}, {11, 7, 5, 1, 1, 3, 2, 1, 1}),
            "[1][2310][2310][1]"
            "[2310]"
            "[11,7,5,1,1,3,2,1,1]"
            "[0,1,2,3,4,5,6,7,8][3,4,7,8]");
+
+  EXPECT_EQ(BCast({1}, {11, 7, 5, 1, 1, 3, 2, 1, 1}, false),
+            "[1,1,1,1,1,1,1,1,1][11,7,5,1,1,3,2,1,1]"  // x_reshape(), x_bcast()
+            "[11,7,5,1,1,3,2,1,1][1,1,1,1,1,1,1,1,1]"  // y_reshape(), y_bcast()
+            "[11,7,5,1,1,3,2,1,1]"
+            "[11,7,5,1,1,3,2,1,1]"
+            "[0,1,2,3,4,5,6,7,8][3,4,7,8]");
 }

 TEST(BCastTest, Basic_Tensor_Vector) {
@ -141,12 +207,24 @@ TEST(BCastTest, Basic_Tensor_Vector) {
            "[11,7,5,3,2]"
            "[][0,1,2,3]");

+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {2}, false),
+            "[11,7,5,3,2][1,1,1,1,1][1,1,1,1,2][11,7,5,3,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][0,1,2,3]");
+
  // [2] [11, 7, 5, 3, 2]
  EXPECT_EQ(BCast({2}, {11, 7, 5, 3, 2}),
            "[1,2][1155,1][1155,2][1,1]"
            "[1155,2]"
            "[11,7,5,3,2]"
            "[0,1,2,3][]");
+
+  EXPECT_EQ(BCast({2}, {11, 7, 5, 3, 2}, false),
+            "[1,1,1,1,2][11,7,5,3,1][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[0,1,2,3][]");
 }

 TEST(BCastTest, Basic_Tensor_Matrix) {
@ -156,12 +234,25 @@ TEST(BCastTest, Basic_Tensor_Matrix) {
            "[385,6]"
            "[11,7,5,3,2]"
            "[][0,1,2]");
+
+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {3, 2}, false),
+            "[11,7,5,3,2][1,1,1,1,1][1,1,1,3,2][11,7,5,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][0,1,2]");
+
  // [3, 2] [11, 7, 5, 3, 2]
  EXPECT_EQ(BCast({3, 2}, {11, 7, 5, 3, 2}),
            "[1,6][385,1][385,6][1,1]"
            "[385,6]"
            "[11,7,5,3,2]"
            "[0,1,2][]");
+
+  EXPECT_EQ(BCast({3, 2}, {11, 7, 5, 3, 2}, false),
+            "[1,1,1,3,2][11,7,5,1,1][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[0,1,2][]");
 }

 TEST(BCastTest, Basic_Tensor_Matrix_Column) {
@ -172,12 +263,24 @@ TEST(BCastTest, Basic_Tensor_Matrix_Column) {
            "[11,7,5,3,2]"
            "[][0,1,2,4]");

+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {3, 1}, false),
+            "[11,7,5,3,2][1,1,1,1,1][1,1,1,3,1][11,7,5,1,2]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][0,1,2,4]");
+
  // [3, 1] [11, 7, 5, 3, 2]
  EXPECT_EQ(BCast({3, 1}, {11, 7, 5, 3, 2}),
            "[1,3,1][385,1,2][385,3,2][1,1,1]"
            "[385,3,2]"
            "[11,7,5,3,2]"
            "[0,1,2,4][]");
+
+  EXPECT_EQ(BCast({3, 1}, {11, 7, 5, 3, 2}, false),
+            "[1,1,1,3,1][11,7,5,1,2][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[0,1,2,4][]");
 }

 TEST(BCastTest, Basic_Tensor_Matrix_As_Tensor) {
@ -188,12 +291,23 @@ TEST(BCastTest, Basic_Tensor_Matrix_As_Tensor) {
            "[11,7,5,3,2]"
            "[][0,3,4]");

+  EXPECT_EQ(BCast({11, 7, 5, 3, 2}, {7, 5, 1, 1}, false),
+            "[11,7,5,3,2][1,1,1,1,1][1,7,5,1,1][11,1,1,3,2]"
+            "[11,7,5,3,2]"
+            "[11,7,5,3,2]"
+            "[][0,3,4]");
+
  // [7, 5, 1, 1] [11, 7, 5, 3, 2]
  EXPECT_EQ(BCast({7, 5, 1, 1}, {11, 7, 5, 3, 2}),
            "[1,35,1][11,1,6][11,35,6][1,1,1]"
            "[11,35,6]"
            "[11,7,5,3,2]"
            "[0,3,4][]");
+
+  EXPECT_EQ(BCast({7, 5, 1, 1}, {11, 7, 5, 3, 2}, false),
+            "[1,7,5,1,1][11,1,1,3,2][11,7,5,3,2][1,1,1,1,1]"
+            "[11,7,5,3,2][11,7,5,3,2]"
+            "[0,3,4][]");
 }

 TEST(BCastTest, Complex_BCast_To_Each_Other) {
@ -205,14 +319,18 @@ TEST(BCastTest, Complex_BCast_To_Each_Other) {
  //   y = np.arange(0,21).reshape([7,1,3,1])
  //   np.shape(x + y)
  //   Out[.]: (11, 7, 5, 3, 2)
-  EXPECT_EQ(BCast({11, 1, 5, 1, 2}, {7, 1, 3, 1}),
-            "[11,1,5,1,2][1,7,1,3,1][1,7,1,3,1][11,1,5,1,2]"
-            "[11,7,5,3,2]"
-            "[11,7,5,3,2]"
-            "[1,3][0,2,4]");
+  string truth =
+      "[11,1,5,1,2][1,7,1,3,1][1,7,1,3,1][11,1,5,1,2]"
+      "[11,7,5,3,2]"
+      "[11,7,5,3,2]"
+      "[1,3][0,2,4]";
+
+  EXPECT_EQ(BCast({11, 1, 5, 1, 2}, {7, 1, 3, 1}), truth);
+  EXPECT_EQ(BCast({11, 1, 5, 1, 2}, {7, 1, 3, 1}, false), truth);
 }

 TEST(BCastTest, TestZeroDimensionShape) {
+  // (2,0,5) and (5) in both orders
  EXPECT_EQ(BCast({2, 0, 5}, {5}),
            "[0,5][1,1][1,5][0,1]"
            "[0,5]"
@ -224,6 +342,18 @@ TEST(BCastTest, TestZeroDimensionShape) {
            "[2,0,5]"
            "[0,1][]");

+  EXPECT_EQ(BCast({2, 0, 5}, {5}, false),
+            "[2,0,5][1,1,1][1,1,5][2,0,1]"
+            "[2,0,5]"
+            "[2,0,5]"
+            "[][0,1]");
+  EXPECT_EQ(BCast({5}, {2, 0, 5}, false),
+            "[1,1,5][2,0,1][2,0,5][1,1,1]"
+            "[2,0,5]"
+            "[2,0,5]"
+            "[0,1][]");
+
+  // (2,0,3,0,5) and (5) in both orders
  EXPECT_EQ(BCast({2, 0, 3, 0, 5}, {5}),
            "[0,5][1,1][1,5][0,1]"
            "[0,5]"
@ -235,6 +365,18 @@ TEST(BCastTest, TestZeroDimensionShape) {
            "[2,0,3,0,5]"
            "[0,1,2,3][]");

+  EXPECT_EQ(BCast({2, 0, 3, 0, 5}, {5}, false),
+            "[2,0,3,0,5][1,1,1,1,1][1,1,1,1,5][2,0,3,0,1]"
+            "[2,0,3,0,5]"
+            "[2,0,3,0,5]"
+            "[][0,1,2,3]");
+  EXPECT_EQ(BCast({5}, {2, 0, 3, 0, 5}, false),
+            "[1,1,1,1,5][2,0,3,0,1][2,0,3,0,5][1,1,1,1,1]"
+            "[2,0,3,0,5]"
+            "[2,0,3,0,5]"
+            "[0,1,2,3][]");
+
+  // (2,0,3,0,5) and (3,1,5) in both orders
  EXPECT_EQ(BCast({2, 0, 3, 0, 5}, {3, 1, 5}),
            "[0,3,0,5][1,1,1,1][1,3,1,5][0,1,0,1]"
            "[0,3,0,5]"
@ -245,6 +387,17 @@ TEST(BCastTest, TestZeroDimensionShape) {
            "[0,3,0,5]"
            "[2,0,3,0,5]"
            "[0,1,3][]");
+
+  EXPECT_EQ(BCast({2, 0, 3, 0, 5}, {3, 1, 5}, false),
+            "[2,0,3,0,5][1,1,1,1,1][1,1,3,1,5][2,0,1,0,1]"
+            "[2,0,3,0,5]"
+            "[2,0,3,0,5]"
+            "[][0,1,3]");
+  EXPECT_EQ(BCast({3, 1, 5}, {2, 0, 3, 0, 5}, false),
+            "[1,1,3,1,5][2,0,1,0,1][2,0,3,0,5][1,1,1,1,1]"
+            "[2,0,3,0,5]"
+            "[2,0,3,0,5]"
+            "[0,1,3][]");
 }

 static void BM_BCastSetup(int iters, int same_shape) {