Add DeviceIndex xla op.

DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs. In the case of XLA, we are not executing on any device, we return the length of the list. PiperOrigin-RevId: 317740778 Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4
2020-06-22 14:53:59 -07:00 · 2020-06-22 14:53:59 -07:00 · b2f0928940
commit b2f0928940
parent 7975b7a0c0
5 changed files with 79 additions and 1 deletions
--- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc
+++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc
@ -1837,7 +1837,7 @@ absl::flat_hash_map<string, std::vector<string>>* GetWhitelistTable() {
      "ConcatOffset", "Const", "MirrorPad", "Pack", "Pad", "PadV2", "Reverse",
      "ReverseV2", "ReverseSequence", "Slice", "Split", "SplitV",
      "StridedSlice", "StridedSliceGrad", "ResourceStridedSliceAssign",
-      "Tile", "Transpose", "InvertPermutation", "Unpack"}}};
+      "Tile", "Transpose", "InvertPermutation", "Unpack", "DeviceIndex"}}};
  // clang-format on
  return result;
 }
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@ -32,6 +32,7 @@ tf_kernel_library(
        "data_format_ops.cc",
        "depthtospace_op.cc",
        "dequantize_op.cc",
+        "device_index_op.cc",
        "diag_op.cc",
        "dynamic_slice_ops.cc",
        "dynamic_stitch_op.cc",
--- a/tensorflow/compiler/tf2xla/kernels/device_index_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/device_index_op.cc
@ -0,0 +1,51 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/compiler/tf2xla/xla_helpers.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/core/framework/kernel_def_builder.h"
+
+namespace tensorflow {
+namespace {
+
+class DeviceIndexOp : public XlaOpKernel {
+ public:
+  explicit DeviceIndexOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("device_names", &device_names_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    // When compiling we are not executing on any physical device, so we return
+    // a sentinel value (size of the list of devices).
+    ctx->SetOutput(
+        0, xla::ConstantR0<int32>(ctx->builder(), device_names_.size()));
+  }
+
+ private:
+  std::vector<string> device_names_;
+};
+
+REGISTER_XLA_OP(Name("DeviceIndex"), DeviceIndexOp);
+
+}  // namespace
+}  // namespace tensorflow
--- a/tensorflow/core/api_def/base_api/api_def_DeviceIndex.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DeviceIndex.pbtxt
@ -2,4 +2,10 @@ op {
  graph_op_name: "DeviceIndex"
  visibility: HIDDEN
  summary: "Return the index of device the op runs."
+  description: <<END
+Given a list of device names, this operation returns the index of the device
+this op runs. The length of the list is returned in two cases:
+(1) Device does not exist in the given device list.
+(2) It is in XLA compilation.
+END
 }
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@ -1274,6 +1274,26 @@ class ExecuteFnForDeviceTest(test_util.TensorFlowTestCase):
      self.assertEqual(6., self.evaluate(result))
      self.assertEqual([2.], self.evaluate(grad))

+  def testCompile(self):
+    if not test_util.is_gpu_available():
+      return
+
+    def cpu_fn(x):
+      return x + x
+
+    def gpu_fn(x):
+      return x * x
+
+    @def_function.function(experimental_compile=True)
+    def flexible_defun(a):
+      branches = {"CPU": lambda: cpu_fn(a), "GPU": lambda: gpu_fn(a)}
+      return control_flow_ops.execute_fn_for_device(branches, lambda: cpu_fn(a))
+
+    # Always execute the default branch in xla compilation case.
+    a = array_ops.constant(3.)
+    r = flexible_defun(a)
+    self.assertEqual(6., self.evaluate(r))
+
  def testFallBack(self):

    def default_fn(x):