From 85c5f4d56604174a94e25d313bb726ee5a08d698 Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 4 Dec 2019 13:40:53 -0800 Subject: [PATCH 001/178] Refactor int array copying code --- .../experimental/micro/micro_allocator.cc | 33 ++++++++++++++----- .../lite/experimental/micro/micro_allocator.h | 5 +++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/experimental/micro/micro_allocator.cc b/tensorflow/lite/experimental/micro/micro_allocator.cc index 82b3b350c23..6f1a7ffe5d6 100644 --- a/tensorflow/lite/experimental/micro/micro_allocator.cc +++ b/tensorflow/lite/experimental/micro/micro_allocator.cc @@ -66,6 +66,29 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator { } // namespace +// Allocate a TfLiteIntArray and copy the contents of a FlatBuffers Vector +// into it. +template +TfLiteStatus MicroAllocator::FlatBufferIntArrayToTfLiteIntArray( + const flatbuffers::Vector* flat_array, TfLiteIntArray** result) { + TfLiteIntArray* ret = + reinterpret_cast(memory_allocator_.AllocateFromTail( + TfLiteIntArrayGetSizeInBytes(flat_array->Length()), + alignof(TfLiteIntArray))); + if (nullptr == ret) { + error_reporter_->Report( + "Failed to allocate %d bytes of memory to copy an array.", + TfLiteIntArrayGetSizeInBytes(flat_array->Length())); + return kTfLiteError; + } + ret->size = flat_array->Length(); + for (int i = 0; i < flat_array->Length(); i++) { + ret->data[i] = flat_array->Get(i); + } + *result = ret; + return kTfLiteOk; +} + MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, uint8_t* tensor_arena, size_t arena_size, ErrorReporter* error_reporter) @@ -450,14 +473,8 @@ TfLiteStatus MicroAllocator::InitializeRuntimeTensor( flatbuffer_tensor, &result->bytes, &type_size, error_reporter)); // Copy the shape of the tensor from the serialized data into the runtime // form. We have to allocate memory for this. - result->dims = - reinterpret_cast(memory_allocator_.AllocateFromTail( - TfLiteIntArrayGetSizeInBytes(flatbuffer_tensor.shape()->Length()), - alignof(TfLiteIntArray))); - result->dims->size = flatbuffer_tensor.shape()->Length(); - for (size_t n = 0; n < flatbuffer_tensor.shape()->Length(); ++n) { - result->dims->data[n] = flatbuffer_tensor.shape()->Get(n); - } + TF_LITE_ENSURE_STATUS(FlatBufferIntArrayToTfLiteIntArray( + flatbuffer_tensor.shape(), &(result->dims))); // Copy the quantization information from the serialized data. const auto* src_quantization = flatbuffer_tensor.quantization(); if (src_quantization && src_quantization->scale() && diff --git a/tensorflow/lite/experimental/micro/micro_allocator.h b/tensorflow/lite/experimental/micro/micro_allocator.h index 9ca76222442..3d3f3ef5210 100644 --- a/tensorflow/lite/experimental/micro/micro_allocator.h +++ b/tensorflow/lite/experimental/micro/micro_allocator.h @@ -74,6 +74,11 @@ class MicroAllocator { const OpResolver& op_resolver, NodeAndRegistration** node_and_registrations); + private: + template + TfLiteStatus FlatBufferIntArrayToTfLiteIntArray( + const flatbuffers::Vector* flat_array, TfLiteIntArray** result); + private: const Model* model_; SimpleMemoryAllocator memory_allocator_; From d4c054fcd8c97e9dbd140235a6c652cbeb9aaf8f Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 4 Dec 2019 13:43:14 -0800 Subject: [PATCH 002/178] Copy and swap input offset arrays on big-endian machines --- .../lite/experimental/micro/micro_allocator.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/lite/experimental/micro/micro_allocator.cc b/tensorflow/lite/experimental/micro/micro_allocator.cc index 6f1a7ffe5d6..5f832656c49 100644 --- a/tensorflow/lite/experimental/micro/micro_allocator.cc +++ b/tensorflow/lite/experimental/micro/micro_allocator.cc @@ -207,12 +207,25 @@ TfLiteStatus MicroAllocator::AllocateNodeAndRegistrations( (void**)(&builtin_data))); } + // Instead of copying the input and output indices, maintain pointers + // into the FlatBuffers Vector objects in the model, which happen to have + // the same memory format as TfLiteIntArray. // Disregard const qualifier to workaround with existing API. TfLiteIntArray* inputs_array = const_cast( reinterpret_cast(op->inputs())); TfLiteIntArray* outputs_array = const_cast( reinterpret_cast(op->outputs())); + if (!FLATBUFFERS_LITTLEENDIAN) { + // Big-endian architecture. Make a copy of the input and output indices, + // because TfLiteIntArray is always in host byte order and FlatBuffers' + // Vectors are always in little-endian byte order. + TF_LITE_ENSURE_STATUS( + FlatBufferIntArrayToTfLiteIntArray(op->inputs(), &inputs_array)); + TF_LITE_ENSURE_STATUS( + FlatBufferIntArrayToTfLiteIntArray(op->outputs(), &outputs_array)); + } + TfLiteNode* node = &(output[i].node); node->inputs = inputs_array; node->outputs = outputs_array; From 13e3fed8a7ff7e98179ad2f679f5f2cd668a6bcf Mon Sep 17 00:00:00 2001 From: frreiss Date: Wed, 4 Dec 2019 13:47:03 -0800 Subject: [PATCH 003/178] Revert getter functions back to original behavior --- tensorflow/lite/kernels/kernel_util.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/kernels/kernel_util.h b/tensorflow/lite/kernels/kernel_util.h index b343ec459e8..161fe5b1a30 100644 --- a/tensorflow/lite/kernels/kernel_util.h +++ b/tensorflow/lite/kernels/kernel_util.h @@ -30,24 +30,20 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) { } inline const TfLiteTensor* GetInput(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context - ->tensors[flatbuffers::EndianScalar(node->inputs->data[index])]; + return &context->tensors[node->inputs->data[index]]; } inline TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node, int index) { - TfLiteTensor* tensor = - &context->tensors[flatbuffers::EndianScalar(node->inputs->data[index])]; + TfLiteTensor* tensor = &context->tensors[node->inputs->data[index]]; return (tensor->is_variable) ? tensor : nullptr; } inline TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context - ->tensors[flatbuffers::EndianScalar(node->outputs->data[index])]; + return &context->tensors[node->outputs->data[index]]; } inline TfLiteTensor* GetTemporary(TfLiteContext* context, const TfLiteNode* node, int index) { - return &context->tensors[flatbuffers::EndianScalar( - node->temporaries->data[index])]; + return &context->tensors[node->temporaries->data[index]]; } inline const TfLiteTensor* GetIntermediates(TfLiteContext* context, const TfLiteNode* node, int index) { @@ -76,8 +72,7 @@ inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context, int index) { const bool use_tensor = node->inputs->data[index] != kTfLiteOptionalTensor; if (use_tensor) { - return &context - ->tensors[flatbuffers::EndianScalar(node->inputs->data[index])]; + return &context->tensors[node->inputs->data[index]]; } return nullptr; } From a7c9e054723eabea4081b603462c2f0414f41aa0 Mon Sep 17 00:00:00 2001 From: Kam D Kasravi Date: Wed, 1 Apr 2020 12:02:27 -0700 Subject: [PATCH 004/178] fix tensorflow mkl script syntax error - TFDOQA-2036 --- tensorflow/tools/ci_build/linux/mkl/set-build-env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py index 6f933308d06..ba5176a4b7a 100755 --- a/tensorflow/tools/ci_build/linux/mkl/set-build-env.py +++ b/tensorflow/tools/ci_build/linux/mkl/set-build-env.py @@ -173,7 +173,7 @@ class CascadelakePlatform(IntelPlatform): if IntelPlatform.use_old_arch_names(self, 9, 1): ret_val = self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ CASCADELAKE_ARCH_OLD + " " - return ret_val + self.BAZEL_PREFIX_ + slef.FLAG_PREFIX_ + \ + return ret_val + self.BAZEL_PREFIX_ + self.FLAG_PREFIX_ + \ VNNI_FLAG + " " else: return self.BAZEL_PREFIX_ + self.ARCH_PREFIX_ + \ From b7b0761566a18dac0f4e57f22854d3ec75603515 Mon Sep 17 00:00:00 2001 From: Leslie-Fang Date: Sun, 26 Apr 2020 03:48:51 +0800 Subject: [PATCH 005/178] fix tflite python whl package pybind11 build fail --- tensorflow/lite/tools/pip_package/README.md | 1 + tensorflow/lite/tools/pip_package/setup.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/tools/pip_package/README.md b/tensorflow/lite/tools/pip_package/README.md index dac8ce02ca1..0a73bb364e5 100644 --- a/tensorflow/lite/tools/pip_package/README.md +++ b/tensorflow/lite/tools/pip_package/README.md @@ -9,6 +9,7 @@ To build a binary wheel run this script: ```sh sudo apt install swig libjpeg-dev zlib1g-dev python3-dev python3-numpy +pip install numpy pybind11 sh tensorflow/lite/tools/make/download_dependencies.sh sh tensorflow/lite/tools/pip_package/build_pip_package.sh ``` diff --git a/tensorflow/lite/tools/pip_package/setup.py b/tensorflow/lite/tools/pip_package/setup.py index 2f2515145c4..d5e779435e7 100644 --- a/tensorflow/lite/tools/pip_package/setup.py +++ b/tensorflow/lite/tools/pip_package/setup.py @@ -33,6 +33,7 @@ import sysconfig from distutils.command.build_ext import build_ext import numpy +import pybind11 from setuptools import Extension from setuptools import find_packages @@ -178,8 +179,9 @@ ext = Extension( 'pip_package'), numpy.get_include(), os.path.join(DOWNLOADS_DIR, 'flatbuffers', 'include'), - os.path.join(DOWNLOADS_DIR, 'absl') - ] + get_pybind_include(), + os.path.join(DOWNLOADS_DIR, 'absl'), + pybind11.get_include() + ], libraries=[LIB_TFLITE], library_dirs=[LIB_TFLITE_DIR]) From 84abc3216b3c2536e9922300ea9acbaa3b4a7742 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Tue, 12 May 2020 12:38:51 -0700 Subject: [PATCH 006/178] [Intel MKL] Restructuring layout pass tests for bfloat16 - part 4 --- .../common_runtime/mkl_layout_pass_test.cc | 1066 +++++++++-------- 1 file changed, 570 insertions(+), 496 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc index c6d5331852e..dc85017d536 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass_test.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass_test.cc @@ -2395,559 +2395,633 @@ REGISTER_TEST(NodeMerge_PadWithFusedConv2D_Common_InOutput, DT_BFLOAT16, BFloat16Input, BFloat16Output2); #endif #undef REGISTER_TEST -// clang-format on -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Conv2DBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropFilter);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Int32Input'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'Conv2DBackpropFilter'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B', 'C']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(Int32Input);C(" #INPUT ");D(_MklConv2DBackpropFilter);" \ + "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" \ + "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" \ + "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" \ + "DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Conv2DGradFilter_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradInput_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Conv2DBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['B', 'A', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_MklConv2DBackpropInput);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D:1;A->E;B->D;B:control->DMT/_0:control;" - "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;" - "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Int32Input'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'Conv2DBackpropInput'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['B', 'A', 'C']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(Int32Input);C(" #INPUT ");D(_MklConv2DBackpropInput);" \ + "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" \ + "A->D:1;A->E;B->D;B:control->DMT/_0:control;" \ + "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;" \ + "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Conv2DGradInput_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, - NodeRewrite_DepthwiseConv2dNativeGradFilter_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'DepthwiseConv2dNativeBackpropFilter'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_" - "MklDepthwiseConv2dNativeBackpropFilter);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Int32Input'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'DepthwiseConv2dNativeBackpropFilter'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B', 'C']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(Int32Input);C(" #INPUT ");D(_" \ + "MklDepthwiseConv2dNativeBackpropFilter);" \ + "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" \ + "A->D;A->E;A:control->DMT/_0:control;A:control->DMT/_1:control;" \ + "A:control->DMT/_2:control;B->D:1;C->D:2;D->E:1;DMT/_0->D:3;" \ + "DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_DepthwiseConv2dNativeGradFilter_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_DepthwiseConv2dNativeGradInput_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'DepthwiseConv2dNativeBackpropInput'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['B', 'A', 'C']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Input);D(_" - "MklDepthwiseConv2dNativeBackpropInput);" - "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" - "A->D:1;A->E;B->D;B:control->DMT/_0:control;" - "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;" - "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Int32Input'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'DepthwiseConv2dNativeBackpropInput'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['B', 'A', 'C']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(Int32Input);C(" #INPUT ");D(_" \ + "MklDepthwiseConv2dNativeBackpropInput);" \ + "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|" \ + "A->D:1;A->E;B->D;B:control->DMT/_0:control;" \ + "B:control->DMT/_1:control;B:control->DMT/_2:control;C->D:2;" \ + "D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_DepthwiseConv2DGradInput_Positive); +#undef REGISTER_TEST // Check that we never rewrite BiasAddGrad. -TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Polygamma'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Polygamma);D(Zeta);E(BiasAddGrad)|" - "A->C;A->D:1;B->C:1;C->D;D->E"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'Polygamma'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B']}" \ + "node { name: 'D' op: 'Zeta'" \ + " attr {key: 'T' value { type: " #T " } }" \ + " input: ['C', 'A']}" \ + "node { name: 'E' op: 'BiasAddGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " input: ['D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(Polygamma);D(Zeta);E(BiasAddGrad)|" \ + "A->C;A->D:1;B->C:1;C->D;D->E"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_BiasAddGrad_Positive); +#undef REGISTER_TEST // Check that we never rewrite BiasAddGrad. -TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive1) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'MatMul'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'transpose_a' value { b: false } }" - " attr { key: 'transpose_b' value { b: false } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklMatMul);D(Zeta);E(BiasAddGrad)" - "|A->C;A->D:1;B->C:1;C->D;D->E"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'MatMul'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'transpose_a' value { b: false } }" \ + " attr { key: 'transpose_b' value { b: false } }" \ + " input: ['A', 'B']}" \ + "node { name: 'D' op: 'Zeta'" \ + " attr {key: 'T' value { type: " #T " } }" \ + " input: ['C', 'A']}" \ + "node { name: 'E' op: 'BiasAddGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " input: ['D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(_MklMatMul);D(Zeta);E(BiasAddGrad)" \ + "|A->C;A->D:1;B->C:1;C->D;D->E"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_BiasAddGrad_Positive1); +#undef REGISTER_TEST // Check that we never rewrite BiasAddGrad. -TEST_F(MklLayoutPassTest, NodeRewrite_BiasAddGrad_Positive2) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'M' op: '_MklInput'}" - "node { name: 'N' op: '_MklInput'}" - "node { name: 'C' op: '_MklConv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B', 'M', 'N']}" - "node { name: 'D' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'A']}" - "node { name: 'E' op: 'BiasAddGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " input: ['D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklConv2D);D(Zeta);E(BiasAddGrad);" - "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;" - "M->C:2;N->C:3"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'M' op: '_MklInput'}" \ + "node { name: 'N' op: '_MklInput'}" \ + "node { name: 'C' op: '_MklConv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B', 'M', 'N']}" \ + "node { name: 'D' op: 'Zeta'" \ + " attr {key: 'T' value { type: " #T " } }" \ + " input: ['C', 'A']}" \ + "node { name: 'E' op: 'BiasAddGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " input: ['D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(_MklConv2D);D(Zeta);E(BiasAddGrad);" \ + "M(_MklInput);N(_MklInput)|A->C;A->D:1;B->C:1;C->D;D->E;" \ + "M->C:2;N->C:3"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_BiasAddGrad_Positive2); +#undef REGISTER_TEST // Concat Op test: Concat with no Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Basic) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['A', 'B:0', 'B:1']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(_MklConcat);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;" - "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'B' op: '" #INPUT "List'" \ + " attr { key: 'N' value { i: 2 } }}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'Concat'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['A', 'B:0', 'B:1']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['C', 'D'] }"); \ + EXPECT_EQ( \ + DoMklLayoutOptimizationPass(), \ + "A(Const);B(" #INPUT "List);C(" #INPUT ");D(_MklConcat);DMT/_0(Const);"\ + "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;A:control->DMT/_2:control;B->D:1;" \ + "B:1->D:2;C->E;D->E:1;DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Concat_Basic); +#undef REGISTER_TEST // Concat with 2 Mkl layers feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_Mkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['G', 'E', 'F']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" - "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "B->E:1;C->F;C:control->DMT/_2:control;C:control->DMT/_3:control;" - "D->F:1;DMT/_0->E:2;DMT/_1->E:3;DMT/_2->F:2;DMT/_3->F:3;" - "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;" - "G:control->DMT/_4:control;H->I:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: '" #INPUT "'}" \ + "node { name: 'E' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B']}" \ + "node { name: 'F' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['C', 'D']}" \ + "node { name: 'G' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'H' op: 'Concat'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['G', 'E', 'F']}" \ + "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'H'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(" #INPUT ");D(" #INPUT ");" \ + "DMT/_0(Const);DMT/_1(Const);" \ + "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" \ + "F(_MklConv2D);G(Const);H(_MklConcat);I(Zeta)|A->E;A->I;" \ + "A:control->DMT/_0:control;A:control->DMT/_1:control;" \ + "B->E:1;C->F;C:control->DMT/_2:control;C:control->DMT/_3:control;" \ + "D->F:1;DMT/_0->E:2;DMT/_1->E:3;DMT/_2->F:2;DMT/_3->F:3;" \ + "DMT/_4->H:3;E->H:1;E:2->H:4;F->H:2;F:2->H:5;G->H;" \ + "G:control->DMT/_4:control;H->I:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Concat_Input_Mkl); +#undef REGISTER_TEST // Concat with 1 Mkl and 1 non-Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_Concat_Input_MixedMkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'Concat'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['G', 'E', 'F']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" - "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;" - "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ +REGISTER_TEST_ALL_TYPES(NodeRewrite_Concat_Input_MixedMkl) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: '" #INPUT "'}" \ + "node { name: 'E' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B']}" \ + "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['C', 'D']}" \ + "node { name: 'G' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'H' op: 'Concat'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['G', 'E', 'F']}" \ + "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'H'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(" #INPUT ");D(" #INPUT ");" \ + "DMT/_0(Const);DMT/_1(Const);" \ + "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" \ + "H(_MklConcat);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" \ + "DMT/_1->E:3;DMT/_2->H:3;DMT/_3->H:5;E->H:1;E:2->H:4;F->H:2;" \ + "G->H;G:control->DMT/_2:control;G:control->DMT/_3:control;H->I:1"); \ } // ConcatV2 Op test: ConcatV2 with no Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Basic) { - InitGraph( - "node { name: 'A' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'B' op: 'InputList'" - " attr { key: 'N' value { i: 2 } }}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['B:0', 'B:1', 'A']}" - "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Const);B(InputList);C(Input);D(_MklConcatV2);DMT/_0(Const);" - "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;B:1->D:1;" - "B:control->DMT/_0:control;B:control->DMT/_1:control;" - "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;" - "DMT/_1->D:4;DMT/_2->D:5"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'B' op: '" #INPUT "List'" \ + " attr { key: 'N' value { i: 2 } }}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: 'ConcatV2'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'Tidx' value { type: DT_INT32 } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['B:0', 'B:1', 'A']}" \ + "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['C', 'D'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(Const);B(" #INPUT "List);C(" #INPUT ");D(_MklConcatV2);" \ + "DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D:2;B->D;" \ + "B:1->D:1;B:control->DMT/_0:control;B:control->DMT/_1:control;" \ + "B:control->DMT/_2:control;C->E;D->E:1;DMT/_0->D:3;" \ + "DMT/_1->D:4;DMT/_2->D:5"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_ConcatV2_Basic); +#undef REGISTER_TEST // ConcatV2 with 2 Mkl layers feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_Mkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['E', 'F', 'G']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" - "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;B->E:1;C->F;" - "C:control->DMT/_2:control;C:control->DMT/_3:control;" - "D->F:1;DMT/_0->E:2;DMT/_1->E:3;DMT/_2->F:2;DMT/_3->F:3;" - "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;" - "F:2->H:4;G->H:2;H->I:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: '" #INPUT "'}" \ + "node { name: 'E' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B']}" \ + "node { name: 'F' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['C', 'D']}" \ + "node { name: 'G' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'H' op: 'ConcatV2'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'Tidx' value { type: DT_INT32 } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['E', 'F', 'G']}" \ + "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'H'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(" #INPUT ");D(" #INPUT ");" \ + "DMT/_0(Const);DMT/_1(Const);" \ + "DMT/_2(Const);DMT/_3(Const);DMT/_4(Const);E(_MklConv2D);" \ + "F(_MklConv2D);G(Const);H(_MklConcatV2);I(Zeta)|A->E;A->I;" \ + "A:control->DMT/_0:control;A:control->DMT/_1:control;B->E:1;C->F;" \ + "C:control->DMT/_2:control;C:control->DMT/_3:control;" \ + "D->F:1;DMT/_0->E:2;DMT/_1->E:3;DMT/_2->F:2;DMT/_3->F:3;" \ + "DMT/_4->H:5;E->H;E:2->H:3;E:control->DMT/_4:control;F->H:1;" \ + "F:2->H:4;G->H:2;H->I:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_ConcatV2_Input_Mkl); +#undef REGISTER_TEST // ConcatV2 with 1 Mkl and 1 non-Mkl layer feeding it -TEST_F(MklLayoutPassTest, NodeRewrite_ConcatV2_Input_MixedMkl) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Input'}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NCHW' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['A', 'B']}" - "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['C', 'D']}" - "node { name: 'G' op: 'Const' " - " attr { key: 'dtype' value { type: DT_INT32 } }" - " attr { key: 'value' value { " - " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " - " int_val: 0 } } } }" - "node { name: 'H' op: 'ConcatV2'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tidx' value { type: DT_INT32 } }" - " attr { key: 'N' value { i: 2 } }" - " input: ['E', 'F', 'G']}" - "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'H'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" - "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" - "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;" - "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;" - "G->H:2;H->I:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: '" #INPUT "'}" \ + "node { name: 'D' op: '" #INPUT "'}" \ + "node { name: 'E' op: 'Conv2D'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'data_format' value { s: 'NCHW' } }" \ + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" \ + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " attr { key: 'padding' value { s: 'SAME' } }" \ + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" \ + " input: ['A', 'B']}" \ + "node { name: 'F' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['C', 'D']}" \ + "node { name: 'G' op: 'Const' " \ + " attr { key: 'dtype' value { type: DT_INT32 } }" \ + " attr { key: 'value' value { " \ + " tensor { dtype: DT_INT32 tensor_shape { dim { size: 1 } } " \ + " int_val: 0 } } } }" \ + "node { name: 'H' op: 'ConcatV2'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'Tidx' value { type: DT_INT32 } }" \ + " attr { key: 'N' value { i: 2 } }" \ + " input: ['E', 'F', 'G']}" \ + "node { name: 'I' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'H'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(" #INPUT ");D(" #INPUT ");" \ + "DMT/_0(Const);DMT/_1(Const);" \ + "DMT/_2(Const);DMT/_3(Const);E(_MklConv2D);F(Zeta);G(Const);" \ + "H(_MklConcatV2);I(Zeta)|A->E;A->I;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->E:1;C->F;D->F:1;DMT/_0->E:2;" \ + "DMT/_1->E:3;DMT/_2->H:4;DMT/_3->H:5;E->H;E:2->H:3;" \ + "E:control->DMT/_2:control;E:control->DMT/_3:control;F->H:1;" \ + "G->H:2;H->I:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_ConcatV2_Input_MixedMkl); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_Relu_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" - "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Relu'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" \ + "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Relu_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_ReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'ReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'ReluGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(_MklReluGrad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_ReluGrad_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_ReluReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'ReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" - "DMT/_1->C:2"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Relu'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'ReluGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklRelu);C(_MklReluGrad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" \ + "DMT/_1->C:2"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_ReluReluGrad_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_Relu6_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu6'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu6);C(Zeta);DMT/_0(Const)|A->B;A->C;" - "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Relu6'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklRelu6);C(Zeta);DMT/_0(Const)|A->B;A->C;" \ + "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Relu6_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_Relu6Grad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'Relu6Grad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklRelu6Grad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'Relu6Grad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(_MklRelu6Grad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Relu6Grad_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_Relu6Relu6Grad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Relu6'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A'] }" - "node { name: 'C' op: 'Relu6Grad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklRelu6);C(_MklRelu6Grad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" - "DMT/_1->C:2"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'Relu6'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'Relu6Grad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklRelu6);C(_MklRelu6Grad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" \ + "DMT/_1->C:2"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_Relu6Relu6Grad_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LeakyRelu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.1 } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLeakyRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" - "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'LeakyRelu'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 0.1 } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklLeakyRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" \ + "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_LeakyRelu_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Negative) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LeakyRelu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 2.0 } }" - " input: ['A'] }" - "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'B'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(LeakyRelu);C(Zeta)|A->B;A->C;B->C:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'LeakyRelu'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 2.0 } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'B'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(LeakyRelu);C(Zeta)|A->B;A->C;B->C:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_LeakyRelu_Negative); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'LeakyReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.1 } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'LeakyReluGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 0.1 } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ(DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_LeakyReluGrad_Positive); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Negative) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Input'}" - "node { name: 'C' op: 'LeakyReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 2.0 } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Input);B(Input);C(LeakyReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: '" #INPUT "'}" \ + "node { name: 'C' op: 'LeakyReluGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 2.0 } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ( \ + DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(" #INPUT ");C(LeakyReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_LeakyReluGrad_Negative); +#undef REGISTER_TEST -TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluLeakyReluGrad_Positive) { - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'LeakyRelu'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.1 } }" - " input: ['A'] }" - "node { name: 'C' op: 'LeakyReluGrad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'alpha' value { f: 0.1 } }" - " input: ['A', 'B'] }" - "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" - " input: ['A', 'C'] }"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Input);B(_MklLeakyRelu);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" - "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" - "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" - "DMT/_1->C:2"); +#define REGISTER_TEST(NAME, T, INPUT) \ + TEST_F(MklLayoutPassTest, NAME##_##T) { \ + InitGraph( \ + "node { name: 'A' op: '" #INPUT "'}" \ + "node { name: 'B' op: 'LeakyRelu'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 0.1 } }" \ + " input: ['A'] }" \ + "node { name: 'C' op: 'LeakyReluGrad'" \ + " attr { key: 'T' value { type: " #T " } }" \ + " attr { key: 'alpha' value { f: 0.1 } }" \ + " input: ['A', 'B'] }" \ + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: " #T " } }" \ + " input: ['A', 'C'] }"); \ + EXPECT_EQ( \ + DoMklLayoutOptimizationPass(), \ + "A(" #INPUT ");B(_MklLeakyRelu);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" \ + "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" \ + "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" \ + "DMT/_1->C:2"); \ } +REGISTER_TEST_ALL_TYPES(NodeRewrite_LeakyReluLeakyReluGrad_Positive); +#undef REGISTER_TEST +// clang-format on TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) { InitGraph( From 984d1da7626e788c01de46616b59888ef48b9fc3 Mon Sep 17 00:00:00 2001 From: Vikram Dattu Date: Fri, 15 May 2020 13:39:16 +0530 Subject: [PATCH 007/178] Fix person detection example esp32-camera repo is progressing and the example code now fails to build. Fixed example and modified instructions to clone specific version of esp32-camera. Signed-off-by: Vikram Dattu --- .../lite/micro/examples/person_detection/esp/README_ESP.md | 5 ++++- .../micro/examples/person_detection/esp/app_camera_esp.h | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection/esp/README_ESP.md b/tensorflow/lite/micro/examples/person_detection/esp/README_ESP.md index 78a7561d5b5..3d79d24d6bc 100644 --- a/tensorflow/lite/micro/examples/person_detection/esp/README_ESP.md +++ b/tensorflow/lite/micro/examples/person_detection/esp/README_ESP.md @@ -23,10 +23,13 @@ example`(below) As the `person_detection` example requires an external component `esp32-camera` for functioning hence we will have to manually clone it in `components/` -directory of the example with following command. +directory of the example with following commands. ``` git clone https://github.com/espressif/esp32-camera.git components/esp32-camera + cd components/esp32-camera/ + git checkout eacd640b8d379883bff1251a1005ebf3cf1ed95c + cd ../../ ``` To build this, run: diff --git a/tensorflow/lite/micro/examples/person_detection/esp/app_camera_esp.h b/tensorflow/lite/micro/examples/person_detection/esp/app_camera_esp.h index 403fb4defb1..fd5f1a8295d 100644 --- a/tensorflow/lite/micro/examples/person_detection/esp/app_camera_esp.h +++ b/tensorflow/lite/micro/examples/person_detection/esp/app_camera_esp.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_ESP_APP_CAMERA_ESP_H_ #define TENSORFLOW_LITE_MICRO_EXAMPLES_PERSON_DETECTION_ESP_APP_CAMERA_ESP_H_ +#include "sensor.h" #include "esp_camera.h" #include "esp_log.h" #include "esp_system.h" @@ -30,7 +31,7 @@ limitations under the License. #define CAMERA_PIXEL_FORMAT PIXFORMAT_GRAYSCALE /* - * FRAMESIZE_96x96, // 96x96 + * FRAMESIZE_96X96, // 96x96 * FRAMESIZE_QQVGA, // 160x120 * FRAMESIZE_QQVGA2, // 128x160 * FRAMESIZE_QCIF, // 176x144 @@ -43,7 +44,7 @@ limitations under the License. * FRAMESIZE_SXGA, // 1280x1024 * FRAMESIZE_UXGA, // 1600x1200 */ -#define CAMERA_FRAME_SIZE FRAMESIZE_96x96 +#define CAMERA_FRAME_SIZE FRAMESIZE_96X96 #if CONFIG_CAMERA_MODEL_WROVER_KIT #define PWDN_GPIO_NUM -1 From 2575e9ae8606a67e286b1274e332c5c7d1a1c6b5 Mon Sep 17 00:00:00 2001 From: frreiss Date: Tue, 26 May 2020 14:24:37 -0700 Subject: [PATCH 008/178] Switch to using TF_LITE_REPORT_ERROR --- tensorflow/lite/micro/micro_allocator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 67fef7635ad..a4920bf113b 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -269,7 +269,7 @@ TfLiteStatus FlatBufferIntArrayToTfLiteIntArray( TfLiteIntArrayGetSizeInBytes(flat_array->Length()), alignof(TfLiteIntArray))); if (nullptr == ret) { - error_reporter->Report( + TF_LITE_REPORT_ERROR(error_reporter, "Failed to allocate %d bytes of memory to copy an array.", TfLiteIntArrayGetSizeInBytes(flat_array->Length())); return kTfLiteError; From d3dbe347fdd5c82fdd643fd231877832d4ec940c Mon Sep 17 00:00:00 2001 From: sshiddib Date: Tue, 26 May 2020 15:03:42 -0700 Subject: [PATCH 009/178] Removing OpenMP dependency from Mkl-dnn supporting threadpool --- .../mkl_threadpool_device_test.cc | 4 +-- tensorflow/core/kernels/mkl_conv_ops.cc | 5 ++-- tensorflow/core/kernels/mkl_conv_ops_test.cc | 2 +- tensorflow/core/kernels/mkl_qmatmul_op.cc | 27 +++++++++++++++++-- .../core/kernels/mkl_quantized_conv_ops.h | 21 +++++++++++++-- ...mkl_requantization_range_per_channel_op.cc | 22 +++++++++++++++ tensorflow/tensorflow.bzl | 4 +-- 7 files changed, 73 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc index 5d583a8360b..c29752d3c2c 100644 --- a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -25,7 +25,7 @@ limitations under the License. namespace tensorflow { -#ifdef _OPENMP +#if defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { SessionOptions options; unsetenv("OMP_NUM_THREADS"); @@ -46,7 +46,7 @@ TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { EXPECT_EQ(omp_get_max_threads(), 314); } -#endif // _OPENMP +#endif // defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 59de3229211..2208401c7b3 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -1493,7 +1493,8 @@ class MklQuantizedConv2DOp float max_output_value; MklQuantizationRangeForMultiplication( min_input, max_input, min_filter.flat()(0), - max_filter.flat()(0), &min_output_value, &max_output_value); + max_filter.flat()(0), &min_output_value, &max_output_value, + context); AllocateOutputSetMklShape(context, 1, &output_min, {}, output_min_mkl_shape); AllocateOutputSetMklShape(context, 2, &output_max, {}, @@ -1510,7 +1511,7 @@ class MklQuantizedConv2DOp output_max_mkl_shape); MklQuantizationRangeForMultiplication( min_input, max_input, min_filter, max_filter, &output_min, - &output_max); + &output_max, context); } } } diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl_conv_ops_test.cc index a055351337c..9d11b0fb006 100644 --- a/tensorflow/core/kernels/mkl_conv_ops_test.cc +++ b/tensorflow/core/kernels/mkl_conv_ops_test.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/core/public/session.h" #if defined(INTEL_MKL_DNN_ONLY) -#include "third_party/intel_mkl_dnn/include/mkldnn.h" +#include "mkldnn.hpp" #include "tensorflow/core/util/mkl_util.h" #endif diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl_qmatmul_op.cc index cc7127e0559..382566c4ab5 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl_qmatmul_op.cc @@ -91,12 +91,15 @@ limitations under the License. // https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training #ifdef INTEL_MKL +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/kernels/mkl_matmul_ops_common.h" #include "tensorflow/core/kernels/mkl_quantized_conv_ops.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_threadpool.h" +#include "tensorflow/core/util/work_sharder.h" namespace { enum { @@ -342,7 +345,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { const float max_weight = context->input(6).flat()(0); MklQuantizationRangeForMultiplication( min_input, max_input, min_weight, max_weight, min_output_value, - max_output_value); + max_output_value, context); } virtual void ExtendMklDnnMatMulFwdParams(OpKernelContext* context, @@ -428,6 +431,26 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { ((max_input - min_input) * std::max(std::abs(max_weight), std::abs(min_weight))); +#ifdef ENABLE_MKLDNN_THREADPOOL + auto parallel_func = [&](int64 start, int64 end) { + for (int64 j = start ; j < end; j++) { + int x = 0; + for (int64 i = 0; i < k; ++i) { + x += wt_buf[i * n + j]; + } + comp_bias[j] = + ((bias_buf[j] * out_scale) + static_cast(x * qa_amin)); + } + }; + + const float kArithCost = 2.5f; + const float kMovCost = 1.0f; + float shard_cost = 4*kArithCost + kMovCost; + const DeviceBase::CpuWorkerThreads& worker_threads = + *(context->device()->tensorflow_cpu_worker_threads()); + Shard(worker_threads.num_threads, worker_threads.workers, n, shard_cost, + parallel_func); +#else #pragma omp parallel for schedule(static) for (int j = 0; j < n; ++j) { int x = 0; @@ -437,7 +460,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { comp_bias[j] = ((bias_buf[j] * out_scale) + static_cast(x * qa_amin)); } - +#endif // ENABLE_MKLDNN_THREADPOOL return reinterpret_cast(comp_bias_); } else if (mode_ == QUANTIZE_MODE_SCALED) { diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index fef2d837cf2..a95f8d29bfc 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -39,7 +39,8 @@ float MklFloatForOneQuantizedLevel(float range_min, float range_max) { template void MklQuantizationRangeForMultiplication(float min_a, float max_a, float min_b, float max_b, - float* min_c, float* max_c) { + float* min_c, float* max_c, + OpKernelContext* context) { const float a_float_for_one_quant_level = MklFloatForOneQuantizedLevel(min_a, max_a); const float b_float_for_one_quant_level = @@ -59,7 +60,8 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, const Tensor& min_b_vector, const Tensor& max_b_vector, Tensor** min_c_vector, - Tensor** max_c_vector) { + Tensor** max_c_vector, + OpKernelContext* context) { DCHECK(min_b_vector.NumElements() == (*min_c_vector)->NumElements()); DCHECK(max_b_vector.NumElements() == (*max_c_vector)->NumElements()); size_t n_channel = min_b_vector.NumElements(); @@ -69,6 +71,20 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, const float* max_b = max_b_vector.flat().data(); float* min_c = (*min_c_vector)->flat().data(); float* max_c = (*max_c_vector)->flat().data(); +#ifdef ENABLE_MKLDNN_THREADPOOL + auto eigen_tp = + MklDnnThreadPoolWrapper::GetInstance().CreateThreadPoolPtr(context); + eigen_tp->parallel_for(n_channel, [&](int n, int n_channel) { + float a_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_a, max_a); + float b_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); + float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + min_c[n] = c_float_for_one_quant_level * c_lowest; + max_c[n] = c_float_for_one_quant_level * c_highest; + }); +#else #pragma omp parallel for for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = @@ -80,6 +96,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, min_c[n] = c_float_for_one_quant_level * c_lowest; max_c[n] = c_float_for_one_quant_level * c_highest; } +#endif // ENABLE_MKLDNN_THREADPOOL } } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc index 767a6f1c397..0a19573d901 100644 --- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_threadpool.h" #include "tensorflow/core/util/mkl_util.h" namespace tensorflow { @@ -73,6 +74,26 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Find the ranges of each channel in parallel. float out_min_max = std::numeric_limits::min(); +#ifdef ENABLE_MKLDNN_THREADPOOL + // TODO: Add eigen parallel_for + for(size_t i = 0; i < depth; ++i) { + Eigen::Tensor min = + transposed_input.chip<0>(i).minimum(); + Eigen::Tensor max = + transposed_input.chip<0>(i).maximum(); + const int32_t min_per_channel = min(); + const int32_t max_per_channel = max(); + const int32_t abs_max = + std::max(std::abs(min_per_channel), std::abs(max_per_channel)); + float scale = + std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); + ranges[i] = + scale * static_cast(abs_max) / static_cast(1L << 31); + if (min_per_channel < 0) is_non_negative = false; + + out_min_max = std::max(out_min_max, ranges[i]); + } +#else #pragma omp parallel for reduction(max : out_min_max) for (size_t i = 0; i < depth; ++i) { Eigen::Tensor min = @@ -92,6 +113,7 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Thread-local out_min_max. out_min_max = std::max(out_min_max, ranges[i]); } +#endif // ENABLE_MKLDNN_THREADPOOL // All local out_min_max gets max-reduced into one global out_min_max at // the end of the loop by specifying reduction(max:out_min_max) along with // omp parallel for. diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9a780839be3..5dc5877367b 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -354,9 +354,7 @@ def tf_copts( ) def tf_openmp_copts(): - # TODO(intel-mkl): Remove -fopenmp for threadpool after removing all - # omp pragmas in tensorflow/core. - return if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fopenmp"]) + return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"])) def tfe_xla_copts(): return select({ From 4da2360572103d436f9873f50cef8c940dc963b7 Mon Sep 17 00:00:00 2001 From: sshiddib Date: Tue, 26 May 2020 15:03:42 -0700 Subject: [PATCH 010/178] Removing OpenMP dependency from Mkl-dnn supporting threadpool --- .../mkl_threadpool_device_test.cc | 4 +-- tensorflow/core/kernels/mkl_conv_ops_test.cc | 2 +- tensorflow/core/kernels/mkl_qmatmul_op.cc | 25 ++++++++++++++++++- .../core/kernels/mkl_quantized_conv_ops.h | 14 +++++++++++ ...mkl_requantization_range_per_channel_op.cc | 22 ++++++++++++++++ tensorflow/tensorflow.bzl | 4 +-- 6 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc index 5d583a8360b..c29752d3c2c 100644 --- a/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc +++ b/tensorflow/core/common_runtime/mkl_threadpool_device_test.cc @@ -25,7 +25,7 @@ limitations under the License. namespace tensorflow { -#ifdef _OPENMP +#if defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) TEST(MKLThreadPoolDeviceTest, TestOmpDefaults) { SessionOptions options; unsetenv("OMP_NUM_THREADS"); @@ -46,7 +46,7 @@ TEST(MKLThreadPoolDeviceTest, TestOmpPreSets) { EXPECT_EQ(omp_get_max_threads(), 314); } -#endif // _OPENMP +#endif // defined(_OPENMP) && !defined(ENABLE_MKLDNN_THREADPOOL) } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_conv_ops_test.cc b/tensorflow/core/kernels/mkl_conv_ops_test.cc index a055351337c..9d11b0fb006 100644 --- a/tensorflow/core/kernels/mkl_conv_ops_test.cc +++ b/tensorflow/core/kernels/mkl_conv_ops_test.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/core/public/session.h" #if defined(INTEL_MKL_DNN_ONLY) -#include "third_party/intel_mkl_dnn/include/mkldnn.h" +#include "mkldnn.hpp" #include "tensorflow/core/util/mkl_util.h" #endif diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl_qmatmul_op.cc index cc7127e0559..d8bbc130c55 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl_qmatmul_op.cc @@ -91,12 +91,15 @@ limitations under the License. // https://software.intel.com/en-us/articles/lower-numerical-precision-deep-learning-inference-and-training #ifdef INTEL_MKL +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/kernels/mkl_matmul_ops_common.h" #include "tensorflow/core/kernels/mkl_quantized_conv_ops.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_threadpool.h" +#include "tensorflow/core/util/work_sharder.h" namespace { enum { @@ -428,6 +431,26 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { ((max_input - min_input) * std::max(std::abs(max_weight), std::abs(min_weight))); +#ifdef ENABLE_MKLDNN_THREADPOOL + auto parallel_func = [&](int64 start, int64 end) { + for (int64 j = start ; j < end; j++) { + int x = 0; + for (int64 i = 0; i < k; ++i) { + x += wt_buf[i * n + j]; + } + comp_bias[j] = + ((bias_buf[j] * out_scale) + static_cast(x * qa_amin)); + } + }; + + const float kArithCost = 2.5f; + const float kMovCost = 1.0f; + float shard_cost = 4*kArithCost + kMovCost; + const DeviceBase::CpuWorkerThreads& worker_threads = + *(context->device()->tensorflow_cpu_worker_threads()); + Shard(worker_threads.num_threads, worker_threads.workers, n, shard_cost, + parallel_func); +#else #pragma omp parallel for schedule(static) for (int j = 0; j < n; ++j) { int x = 0; @@ -437,7 +460,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { comp_bias[j] = ((bias_buf[j] * out_scale) + static_cast(x * qa_amin)); } - +#endif // ENABLE_MKLDNN_THREADPOOL return reinterpret_cast(comp_bias_); } else if (mode_ == QUANTIZE_MODE_SCALED) { diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index fef2d837cf2..037a3a5f3ff 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -69,6 +69,19 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, const float* max_b = max_b_vector.flat().data(); float* min_c = (*min_c_vector)->flat().data(); float* max_c = (*max_c_vector)->flat().data(); +#ifdef ENABLE_MKLDNN_THREADPOOL + // TODO: Add eigen parallel_for + for(size_t n = 0; n < n_channel; ++n) { + float a_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_a, max_a); + float b_float_for_one_quant_level = + MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); + float c_float_for_one_quant_level = + a_float_for_one_quant_level * b_float_for_one_quant_level; + min_c[n] = c_float_for_one_quant_level * c_lowest; + max_c[n] = c_float_for_one_quant_level * c_highest; + } +#else #pragma omp parallel for for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = @@ -80,6 +93,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, min_c[n] = c_float_for_one_quant_level * c_lowest; max_c[n] = c_float_for_one_quant_level * c_highest; } +#endif // ENABLE_MKLDNN_THREADPOOL } } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc index 767a6f1c397..0a19573d901 100644 --- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/no_op.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_threadpool.h" #include "tensorflow/core/util/mkl_util.h" namespace tensorflow { @@ -73,6 +74,26 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Find the ranges of each channel in parallel. float out_min_max = std::numeric_limits::min(); +#ifdef ENABLE_MKLDNN_THREADPOOL + // TODO: Add eigen parallel_for + for(size_t i = 0; i < depth; ++i) { + Eigen::Tensor min = + transposed_input.chip<0>(i).minimum(); + Eigen::Tensor max = + transposed_input.chip<0>(i).maximum(); + const int32_t min_per_channel = min(); + const int32_t max_per_channel = max(); + const int32_t abs_max = + std::max(std::abs(min_per_channel), std::abs(max_per_channel)); + float scale = + std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); + ranges[i] = + scale * static_cast(abs_max) / static_cast(1L << 31); + if (min_per_channel < 0) is_non_negative = false; + + out_min_max = std::max(out_min_max, ranges[i]); + } +#else #pragma omp parallel for reduction(max : out_min_max) for (size_t i = 0; i < depth; ++i) { Eigen::Tensor min = @@ -92,6 +113,7 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Thread-local out_min_max. out_min_max = std::max(out_min_max, ranges[i]); } +#endif // ENABLE_MKLDNN_THREADPOOL // All local out_min_max gets max-reduced into one global out_min_max at // the end of the loop by specifying reduction(max:out_min_max) along with // omp parallel for. diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 9a780839be3..5dc5877367b 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -354,9 +354,7 @@ def tf_copts( ) def tf_openmp_copts(): - # TODO(intel-mkl): Remove -fopenmp for threadpool after removing all - # omp pragmas in tensorflow/core. - return if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fopenmp"]) + return (if_mkl_lnx_x64(["-fopenmp"]) + if_mkldnn_threadpool(["-fno-openmp"])) def tfe_xla_copts(): return select({ From 335ecb29ea4dc18a59eafa154ba179dd8fde1426 Mon Sep 17 00:00:00 2001 From: Gregory Keith Date: Wed, 3 Jun 2020 00:12:33 +1000 Subject: [PATCH 011/178] Change accumulator count dtype to int64 --- tensorflow/python/keras/layers/preprocessing/normalization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py index be04e9947b8..ded049e0013 100644 --- a/tensorflow/python/keras/layers/preprocessing/normalization.py +++ b/tensorflow/python/keras/layers/preprocessing/normalization.py @@ -177,7 +177,7 @@ class _NormalizingCombiner(Combiner): reduction_counts = np.delete(values.shape, self.axis) # We get the number of elements that will be reduced by multiplying all # values of 'shape' corresponding to the reduced axes. - count = np.prod(reduction_counts, dtype=np.int32) + count = np.prod(reduction_counts, dtype=np.int64) # We want to reduce across dimensions except those specified in 'axis' # when using np.mean or np.variance; create the tuple of axes to reduce From 0616e14c5446982299407f86efc4d4751605621d Mon Sep 17 00:00:00 2001 From: Srinivasan Narayanamoorthy Date: Wed, 3 Jun 2020 10:34:36 -0700 Subject: [PATCH 012/178] Batch Matmul enhancements. --- .../core/common_runtime/mkl_layout_pass.cc | 4 +- .../core/kernels/mkl_batch_matmul_op.cc | 161 +++++++----------- .../core/kernels/mkl_matmul_ops_common.h | 4 +- tensorflow/core/ops/math_ops.cc | 8 +- 4 files changed, 70 insertions(+), 107 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_layout_pass.cc b/tensorflow/core/common_runtime/mkl_layout_pass.cc index fbec7059743..b3e7262f72f 100644 --- a/tensorflow/core/common_runtime/mkl_layout_pass.cc +++ b/tensorflow/core/common_runtime/mkl_layout_pass.cc @@ -394,10 +394,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { kRewriteForLayoutPropagation}); rinfo_.push_back({csinfo_.batch_matmul, mkl_op_registry::GetMklOpName(csinfo_.batch_matmul), - CopyAttrsAll, AlwaysRewrite, kRewriteForOpNameChange}); + CopyAttrsAll, MatMulRewrite, kRewriteForOpNameChange}); rinfo_.push_back({csinfo_.batch_matmul_v2, mkl_op_registry::GetMklOpName(csinfo_.batch_matmul_v2), - CopyAttrsAll, AlwaysRewrite, kRewriteForOpNameChange}); + CopyAttrsAll, MatMulRewrite, kRewriteForOpNameChange}); rinfo_.push_back( {csinfo_.concat, mkl_op_registry::GetMklOpName(csinfo_.concat), CopyAttrsAll, AlwaysRewrite, kRewriteForLayoutPropagation}); diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index 37888656020..dd31c1a6123 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -25,11 +25,10 @@ limitations under the License. #define EIGEN_USE_THREADS -#if defined(INTEL_MKL) && !defined(INTEL_MKL_DNN_ONLY) +#if defined(INTEL_MKL) #include #include "mkl_cblas.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -44,6 +43,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/matmul_bcast.h" #include "tensorflow/core/util/mkl_util.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" namespace tensorflow { @@ -54,7 +54,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice; template class BatchMatMulMkl : public OpKernel { public: - explicit BatchMatMulMkl(OpKernelConstruction* context) : OpKernel(context) { + explicit BatchMatMulMkl(OpKernelConstruction* context) + : OpKernel(context), eigen_batch_mm_v2_(context) { OP_REQUIRES_OK(context, context->GetAttr("adj_x", &adj_x_)); OP_REQUIRES_OK(context, context->GetAttr("adj_y", &adj_y_)); } @@ -78,10 +79,10 @@ class BatchMatMulMkl : public OpKernel { errors::InvalidArgument("lhs and rhs ndims must be >= 2: ", ndims)); for (int i = 0; i < ndims - 2; ++i) { OP_REQUIRES(ctx, lhs.dim_size(i) == rhs.dim_size(i), - errors::InvalidArgument( - "lhs.dim(", i, ") and rhs.dim(", i, - ") must be the same: ", lhs.shape().DebugString(), - " vs ", rhs.shape().DebugString())); + errors::InvalidArgument("lhs.dim(", i, ") and rhs.dim(", i, + ") must be the same: ", + lhs.shape().DebugString(), " vs ", + rhs.shape().DebugString())); } } else { OP_REQUIRES( @@ -104,6 +105,14 @@ class BatchMatMulMkl : public OpKernel { "In[0] and In[1] must have compatible batch dimensions: ", lhs.shape().DebugString(), " vs. ", rhs.shape().DebugString())); +#ifdef ENABLE_MKLDNN_THREADPOOL + if (bcast.IsBroadcastingRequired()) { + // Calling Eigen Kernel for broadcasting case and return. Eigen does + // not have BF16 support, so we have to fail graciously in that case. + eigen_batch_mm_v2_.Compute(ctx); + return; + } +#endif // ENABLE_MKLDNN_THREADPOOL TensorShape out_shape = bcast.output_batch_shape(); auto batch_size = bcast.output_batch_size(); @@ -148,23 +157,27 @@ class BatchMatMulMkl : public OpKernel { std::vector ldb_array(batch_size, adj_y_ ? K : N); std::vector ldc_array(batch_size, N); std::vector group_size(1, batch_size); - - if (std::is_same::value) { + const Scalar* a = nullptr; + const Scalar* b = nullptr; + Scalar* c = nullptr; + bool threadpool_enabled = false; +#ifdef ENABLE_MKLDNN_THREADPOOL + threadpool_enabled = true; +#endif // ENABLE_MKLDNN_THREADPOOL + if (std::is_same::value || threadpool_enabled) { // DNNL bfloat16 API requires a, b, and c as pointers to tensors // represented as flat-byte array. - const Scalar* a = nullptr; - const Scalar* b = nullptr; - OP_REQUIRES(ctx, !bcast.IsBroadcastingRequired(), - errors::Unimplemented("Broadcasting is not supported for " - "BFloat16 _MklBatchMatMul yet.")); a = &lhs_reshaped(0, 0, 0); b = &rhs_reshaped(0, 0, 0); - Scalar* c = &out_reshaped(0, 0, 0); + OP_REQUIRES(ctx, !bcast.IsBroadcastingRequired(), + errors::Unimplemented("Broadcasting is not supported for " + "_MklBatchMatMul yet.")); + c = &out_reshaped(0, 0, 0); // TODO(nhasabni): Use appropriate cast instead of passing addresses of // a,b and c. MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array, &a, lda_array, &b, ldb_array, &c, ldc_array, 1, - group_size); + group_size, ctx); } else { std::vector a_array; std::vector b_array; @@ -196,86 +209,48 @@ class BatchMatMulMkl : public OpKernel { // pointer is to 2D matrix. MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array, &a_array[0], lda_array, &b_array[0], ldb_array, - &c_array[0], ldc_array, 1, group_size); + &c_array[0], ldc_array, 1, group_size, ctx); } } private: bool adj_x_; bool adj_y_; + BatchMatMulV2Op eigen_batch_mm_v2_; - template ::value || - std::is_same::value), - int>::type = 0> - void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, - const bool TransB, const std::vector& M_Array, - const std::vector& N_Array, - const std::vector& K_Array, const T** A_Array, - const std::vector& lda_Array, - const T** B_Array, - const std::vector& ldb_Array, T** C_Array, - const std::vector& ldc_Array, - const MKL_INT group_count, - const std::vector& group_size) { + void MklCblasGemmBatch( + const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, + const std::vector& M_Array, const std::vector& N_Array, + const std::vector& K_Array, const float** A_Array, + const std::vector& lda_Array, const float** B_Array, + const std::vector& ldb_Array, float** C_Array, + const std::vector& ldc_Array, const MKL_INT group_count, + const std::vector& group_size, OpKernelContext* ctx) { +#ifndef ENABLE_MKLDNN_THREADPOOL std::vector TransA_Array( group_size[0], TransA ? CblasTrans : CblasNoTrans); std::vector TransB_Array( group_size[0], TransB ? CblasTrans : CblasNoTrans); - if (std::is_same::value) { - std::vector alpha_Array(group_size[0], 1.0); - std::vector beta_Array(group_size[0], 0.0); - cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], - &N_Array[0], &K_Array[0], &alpha_Array[0], - reinterpret_cast(A_Array), &lda_Array[0], - reinterpret_cast(B_Array), &ldb_Array[0], - &beta_Array[0], reinterpret_cast(C_Array), - &ldc_Array[0], group_count, &group_size[0]); - } else { - std::vector alpha_Array(group_size[0], 1.0); - std::vector beta_Array(group_size[0], 0.0); - cblas_dgemm_batch( - Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], &N_Array[0], - &K_Array[0], &alpha_Array[0], - reinterpret_cast(A_Array), &lda_Array[0], - reinterpret_cast(B_Array), &ldb_Array[0], - &beta_Array[0], reinterpret_cast(C_Array), &ldc_Array[0], - group_count, &group_size[0]); - } + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], + &N_Array[0], &K_Array[0], &alpha_Array[0], + reinterpret_cast(A_Array), &lda_Array[0], + reinterpret_cast(B_Array), &ldb_Array[0], + &beta_Array[0], reinterpret_cast(C_Array), + &ldc_Array[0], group_count, &group_size[0]); +#else + DCHECK(Layout == CblasRowMajor); + std::vector TransA_Array(group_size[0], TransA); + std::vector TransB_Array(group_size[0], TransB); + std::vector alpha_Array(group_size[0], 1.0); + std::vector beta_Array(group_size[0], 0.0); + dnnl_gemm_batch(TransA_Array, TransB_Array, M_Array, N_Array, + K_Array, alpha_Array, *A_Array, *B_Array, beta_Array, + *C_Array, group_count, group_size, ctx); +#endif // !ENABLE_MKLDNN_THREADPOOL } - - template ::value || - std::is_same::value), - int>::type = 0> - void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, - const bool TransB, const std::vector& M_Array, - const std::vector& N_Array, - const std::vector& K_Array, const T** A_Array, - const std::vector& lda_Array, - const T** B_Array, - const std::vector& ldb_Array, T** C_Array, - const std::vector& ldc_Array, - const MKL_INT group_count, - const std::vector& group_size) { - std::vector TransA_array( - group_size[0], TransA ? CblasConjTrans : CblasNoTrans); - std::vector TransB_array( - group_size[0], TransB ? CblasConjTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); - std::vector beta_Array(group_size[0], {0.0f, 0.0f}); - auto gemm_fn = (std::is_same::value) ? cblas_cgemm_batch - : cblas_zgemm_batch; - gemm_fn(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0], - &N_Array[0], &K_Array[0], static_cast(&alpha_Array[0]), - reinterpret_cast(A_Array), &lda_Array[0], - reinterpret_cast(B_Array), &ldb_Array[0], - static_cast(&beta_Array[0]), - reinterpret_cast(C_Array), &ldc_Array[0], group_count, - &group_size[0]); - } - - // BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards. +// BatchMatMul BFloat16 support only exists in DNNL 1.2 onwards. #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) void MklCblasGemmBatch( const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, @@ -284,7 +259,7 @@ class BatchMatMulMkl : public OpKernel { const std::vector& lda_Array, const bfloat16** B_Array, const std::vector& ldb_Array, bfloat16** C_Array, const std::vector& ldc_Array, const MKL_INT group_count, - const std::vector& group_size) { + const std::vector& group_size, OpKernelContext* ctx) { DCHECK(Layout == CblasRowMajor); std::vector TransA_Array(group_size[0], TransA); std::vector TransB_Array(group_size[0], TransB); @@ -292,9 +267,9 @@ class BatchMatMulMkl : public OpKernel { std::vector beta_Array(group_size[0], 0.0); // TODO(nhasabni): Remove *A when we pass a, b, and c correctly. // MKLDNN API does not require lda, ldb, and ldc. - dnnl_gemm_batch(TransA_Array, TransB_Array, M_Array, N_Array, - K_Array, alpha_Array, *A_Array, *B_Array, - beta_Array, *C_Array, group_count, group_size); + dnnl_gemm_batch( + TransA_Array, TransB_Array, M_Array, N_Array, K_Array, alpha_Array, + *A_Array, *B_Array, beta_Array, *C_Array, group_count, group_size, ctx); } #endif // ENABLE_MKLDNN_V1 && ENABLE_INTEL_MKL_BFLOAT16 }; @@ -314,16 +289,8 @@ class BatchMatMulMkl : public OpKernel { BatchMatMulMkl) #ifdef ENABLE_MKL -TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); -TF_CALL_double(REGISTER_BATCH_MATMUL_MKL); -TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL); - TF_CALL_float(REGISTER_BATCH_MATMUL_MKL_V2); -TF_CALL_double(REGISTER_BATCH_MATMUL_MKL_V2); -TF_CALL_complex64(REGISTER_BATCH_MATMUL_MKL_V2); -TF_CALL_complex128(REGISTER_BATCH_MATMUL_MKL_V2); - +TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) TF_CALL_bfloat16(REGISTER_BATCH_MATMUL_MKL); TF_CALL_bfloat16(REGISTER_BATCH_MATMUL_MKL_V2); diff --git a/tensorflow/core/kernels/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl_matmul_ops_common.h index d3a05a4a6d2..490afd55932 100644 --- a/tensorflow/core/kernels/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl_matmul_ops_common.h @@ -707,8 +707,8 @@ void dnnl_gemm_batch(const std::vector& transa, const std::vector& n, const std::vector& k, const std::vector& alpha, const T* a, const T* b, const std::vector& beta, T* c, - const int group_count, - const std::vector& group_size) { + const int group_count, const std::vector& group_size, + OpKernelContext* ctx = nullptr) { // Current BatchMatMul support in Tensorflow is narrower than the one offered // by MKL and MKL-DNN. Current BatchMatMul support in Tensorflow uses only 1 // group of size equal to batch_size, and all MatMul parameters (m, n, k, diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index dfc2463915c..cc20bc7b4d6 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -142,9 +142,7 @@ REGISTER_OP("_MklBatchMatMul") .Input("x: T") .Input("y: T") .Output("output: T") - .Attr( - "T: {bfloat16, half, float, double, int32, int64, complex64, " - "complex128}") + .Attr("T: {bfloat16, float}") .Attr("adj_x: bool = false") .Attr("adj_y: bool = false") .SetShapeFn(shape_inference::BatchMatMulShape); @@ -153,9 +151,7 @@ REGISTER_OP("_MklBatchMatMulV2") .Input("x: T") .Input("y: T") .Output("output: T") - .Attr( - "T: {bfloat16, half, float, double, int32, int64, complex64, " - "complex128}") + .Attr("T: {bfloat16, float}") .Attr("adj_x: bool = false") .Attr("adj_y: bool = false") .SetShapeFn(shape_inference::BatchMatMulV2Shape); From 856f899e4e7bf0c7490ed12f6ad7f2c852871459 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Fri, 5 Jun 2020 00:36:29 +0000 Subject: [PATCH 013/178] [ROCm] Fix for the ROCm CSB breakage - 200604 This PR addresses 2 unit test failures introduced by the following commit https://github.com/tensorflow/tensorflow/commit/bd20260350de4d0bc870fdc2bda39e62844a3e7b failing unit tests ``` //tensorflow/core/common_runtime/gpu:gpu_device_test //tensorflow/core/common_runtime/gpu:gpu_device_unified_memory_test ``` It is different from PR #40164, which fixes the build error on the ROCm platform. The commit above adds unit tests that check the assignment of priority values to GPU streams. Because ROCm support for assigning priority values to GPU streams is missing, those unit tests fail. This PR/commit adds the missing ROCm support, and updates the unit test to work with AMD GPUs too. The valid priority value range seems to be different for AMD GPUs (0,2) as compared to NVidia GPUs (-1, 0), and hence the changes requrired in the testcases too. --- .../core/common_runtime/gpu/gpu_device.cc | 23 ++++---- .../common_runtime/gpu/gpu_device_test.cc | 59 +++++++++++++++++-- .../stream_executor/rocm/rocm_driver.cc | 15 ++--- .../rocm/rocm_driver_wrapper.h | 1 + 4 files changed, 76 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index e47f56b2624..04b7f9d6082 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -847,7 +847,7 @@ Status VerifyVirtualDeviceSettings( " #valid GPUs: ", valid_platform_gpu_ids.size(), " virtual_devices.size(): ", virtual_devices.size()); } -#if GOOGLE_CUDA +#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM // Check memory_limt_mb and priority sizes match if priority is non-empty. bool priority_exists = !virtual_devices.Get(0).priority().empty(); for (int i = 0; i < virtual_devices.size(); ++i) { @@ -893,15 +893,6 @@ Status VerifyVirtualDeviceSettings( } } } -#elif TENSORFLOW_USE_ROCM - for (int i = 0; i < virtual_devices.size(); ++i) { - if (!virtual_devices.Get(i).priority().empty()) { - return errors::InvalidArgument( - "Priority is supported only on Nvidia GPUs." - " However, priority is set for virtual device ", - i, ", which corresponds to a non Nvidia GPU"); - } - } #endif return Status::OK(); @@ -1185,6 +1176,18 @@ Status BaseGPUDeviceFactory::CreateDevices( platform_gpu_id.value(), " failed. Status: ", hipGetErrorString(err)); } + int priority_low, priority_high; + hipDeviceGetStreamPriorityRange(&priority_low, &priority_high); + if (err != hipSuccess) { + return errors::Internal( + "hipDeviceGetStreamPriorityRange() on GPU:", original_device, + " failed. Status: ", hipGetErrorString(err)); + } + VLOG(1) << "HIP stream priority range on GPU(" << original_device + << "): " << priority_high << "," << priority_low; + supported_priority_ranges.insert( + std::make_pair(platform_gpu_id.value(), + std::make_pair(priority_low, priority_high))); #endif } // Reset to the original device. diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc index 26312d35af6..1703d926f9f 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_test.cc @@ -229,52 +229,89 @@ TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndNoPriority) { TEST_F(GPUDeviceTest, SingleVirtualDeviceWithInvalidPriority) { { - // Priority outside the range (-1, 0). +#if TENSORFLOW_USE_ROCM + // Priority outside the range (0, 2) for AMD GPUs + SessionOptions opts = + MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 2}}); +#else + // Priority outside the range (-1, 0) for NVidia GPUs SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-2, 0}}); +#endif std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); +#if TENSORFLOW_USE_ROCM + ExpectErrorMessageSubstr( + status, + "Priority -1 is outside the range of supported priorities [0,2] for" + " virtual device 0 on GPU# 0"); +#else ExpectErrorMessageSubstr( status, "Priority -2 is outside the range of supported priorities [-1,0] for" " virtual device 0 on GPU# 0"); +#endif } { - // Priority outside the range (-1, 0). +#if TENSORFLOW_USE_ROCM + // Priority outside the range (0, 2) for AMD GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 3}}); +#else + // Priority outside the range (-1, 0) for NVidia GPUs SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); +#endif std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); EXPECT_EQ(status.code(), error::INVALID_ARGUMENT); +#if TENSORFLOW_USE_ROCM + ExpectErrorMessageSubstr( + status, + "Priority 3 is outside the range of supported priorities [0,2] for" + " virtual device 0 on GPU# 0"); +#else ExpectErrorMessageSubstr( status, "Priority 1 is outside the range of supported priorities [-1,0] for" " virtual device 0 on GPU# 0"); +#endif } } TEST_F(GPUDeviceTest, SingleVirtualDeviceWithMemoryLimitAndPriority) { - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{-1}}); + // 0 is a valid priority value for both AMD and NVidia GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123}}, {{0}}); std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(1, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); - EXPECT_EQ(-1, static_cast(devices[0].get())->priority()); + EXPECT_EQ(0, static_cast(devices[0].get())->priority()); } TEST_F(GPUDeviceTest, MultipleVirtualDevices) { +#if TENSORFLOW_USE_ROCM + // Valid range for priority values on AMD GPUs in (0,2) + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, 1}}); +#else + // Valid range for priority values on NVidia GPUs in (-1, 0) SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0, -1}}); +#endif std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(2, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); +#if TENSORFLOW_USE_ROCM + EXPECT_EQ(0, static_cast(devices[0].get())->priority()); + EXPECT_EQ(1, static_cast(devices[1].get())->priority()); +#else EXPECT_EQ(0, static_cast(devices[0].get())->priority()); EXPECT_EQ(-1, static_cast(devices[1].get())->priority()); +#endif ASSERT_EQ(1, devices[0]->attributes().locality().links().link_size()); ASSERT_EQ(1, devices[1]->attributes().locality().links().link_size()); EXPECT_EQ(1, devices[0]->attributes().locality().links().link(0).device_id()); @@ -292,7 +329,8 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevices) { TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { { // Multile virtual devices with fewer priorities. - SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1}}); + // 0 is a valid priority value for both AMD and NVidia GPUs + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{0}}); std::vector> devices; Status status = DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices); @@ -305,16 +343,27 @@ TEST_F(GPUDeviceTest, MultipleVirtualDevicesWithPriority) { } { // Multile virtual devices with matching priority. +#if TENSORFLOW_USE_ROCM + // Valid range for priority values on AMD GPUs in (0,2) + SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{2, 1}}); +#else + // Valid range for priority values on NVidia GPUs in (-1, 0) SessionOptions opts = MakeSessionOptions("0", 0, 1, {{123, 456}}, {{-1, 0}}); +#endif std::vector> devices; TF_CHECK_OK(DeviceFactory::GetFactory("GPU")->CreateDevices( opts, kDeviceNamePrefix, &devices)); EXPECT_EQ(2, devices.size()); EXPECT_EQ(123 << 20, devices[0]->attributes().memory_limit()); EXPECT_EQ(456 << 20, devices[1]->attributes().memory_limit()); +#if TENSORFLOW_USE_ROCM + EXPECT_EQ(2, static_cast(devices[0].get())->priority()); + EXPECT_EQ(1, static_cast(devices[1].get())->priority()); +#else EXPECT_EQ(-1, static_cast(devices[0].get())->priority()); EXPECT_EQ(0, static_cast(devices[1].get())->priority()); +#endif } } diff --git a/tensorflow/stream_executor/rocm/rocm_driver.cc b/tensorflow/stream_executor/rocm/rocm_driver.cc index 5a8154f1df8..a070979e71d 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver.cc +++ b/tensorflow/stream_executor/rocm/rocm_driver.cc @@ -560,14 +560,15 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) { /* static */ bool GpuDriver::CreateStream(GpuContext* context, GpuStreamHandle* stream, int priority) { - if (priority != 0) { - LOG(ERROR) << "ROCM stream doesn't support priority. " - << " Should be set to 0 but given: " << priority; - return false; - } ScopedActivateContext activated{context}; - hipError_t res = tensorflow::wrap::hipStreamCreateWithFlags( - stream, hipStreamDefault); // switch to hipStreamNonBlocking? + hipError_t res; + if (priority == 0) { + res = tensorflow::wrap::hipStreamCreateWithFlags( + stream, hipStreamDefault); // switch to hipStreamNonBlocking? + } else { + res = tensorflow::wrap::hipStreamCreateWithPriority( + stream, hipStreamDefault, priority); // switch to hipStreamNonBlocking? + } if (res != hipSuccess) { LOG(ERROR) << "could not allocate ROCM stream for device " << context->device_ordinal() << ": " << ToString(res); diff --git a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h index bc5b6a87888..e09f7eb0eb2 100644 --- a/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h +++ b/tensorflow/stream_executor/rocm/rocm_driver_wrapper.h @@ -117,6 +117,7 @@ namespace wrap { __macro(hipSetDevice) \ __macro(hipStreamAddCallback) \ __macro(hipStreamCreateWithFlags) \ + __macro(hipStreamCreateWithPriority) \ __macro(hipStreamDestroy) \ __macro(hipStreamQuery) \ __macro(hipStreamSynchronize) \ From b9fef3463e98aabf2c5c18f82891accf0cd9c238 Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 5 Jun 2020 10:53:23 +0800 Subject: [PATCH 014/178] return unknown cardinality when preserve_cardinality is false --- .../kernels/data/experimental/map_and_batch_dataset_op.cc | 2 ++ .../core/kernels/data/experimental/scan_dataset_op.cc | 7 ++++++- tensorflow/core/kernels/data/map_dataset_op.cc | 7 ++++++- tensorflow/core/kernels/data/parallel_map_dataset_op.cc | 7 ++++++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc index 38550730a0f..a5d88e89d7f 100644 --- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc @@ -125,6 +125,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { } int64 Cardinality() const override { + if (!preserve_cardinality_) + return kUnknownCardinality; int64 n = input_->Cardinality(); if (n == kInfiniteCardinality || n == kUnknownCardinality) { return n; diff --git a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc index 2b7ece1661b..e78e9cecec5 100644 --- a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc @@ -106,7 +106,12 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { string DebugString() const override { return "ScanDatasetOp::Dataset"; } - int64 Cardinality() const override { return input_->Cardinality(); } + int64 Cardinality() const override { + if (preserve_cardinality_) + return input_->Cardinality(); + else + return kUnknownCardinality; + } Status CheckExternalState() const override { TF_RETURN_IF_ERROR(captured_func_->CheckExternalState()); diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index cbd0aa093e5..c29ba83f10d 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -72,7 +72,12 @@ class MapDatasetOp::Dataset : public DatasetBase { return name_utils::DatasetDebugString(kDatasetType); } - int64 Cardinality() const override { return input_->Cardinality(); } + int64 Cardinality() const override { + if (preserve_cardinality_) + return input_->Cardinality(); + else + return kUnknownCardinality; + } Status CheckExternalState() const override { TF_RETURN_IF_ERROR(captured_func_->CheckExternalState()); diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index c01f8e7fc30..87af525c4d3 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -109,7 +109,12 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { params); } - int64 Cardinality() const override { return input_->Cardinality(); } + int64 Cardinality() const override { + if (preserve_cardinality_) + return input_->Cardinality(); + else + return kUnknownCardinality; + } Status CheckExternalState() const override { TF_RETURN_IF_ERROR(captured_func_->CheckExternalState()); From a8dcaa2b8fa2c95d53f9cb121948e383c3f5b76d Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Fri, 5 Jun 2020 14:04:16 +0800 Subject: [PATCH 015/178] fix code style --- .../kernels/data/experimental/map_and_batch_dataset_op.cc | 3 ++- tensorflow/core/kernels/data/experimental/scan_dataset_op.cc | 5 +++-- tensorflow/core/kernels/data/map_dataset_op.cc | 5 +++-- tensorflow/core/kernels/data/parallel_map_dataset_op.cc | 5 +++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc index a5d88e89d7f..09783161091 100644 --- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc @@ -125,8 +125,9 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase { } int64 Cardinality() const override { - if (!preserve_cardinality_) + if (!preserve_cardinality_) { return kUnknownCardinality; + } int64 n = input_->Cardinality(); if (n == kInfiniteCardinality || n == kUnknownCardinality) { return n; diff --git a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc index e78e9cecec5..723f32311d0 100644 --- a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc @@ -107,10 +107,11 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { string DebugString() const override { return "ScanDatasetOp::Dataset"; } int64 Cardinality() const override { - if (preserve_cardinality_) + if (preserve_cardinality_) { return input_->Cardinality(); - else + } else { return kUnknownCardinality; + } } Status CheckExternalState() const override { diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc index c29ba83f10d..d34e4f2b041 100644 --- a/tensorflow/core/kernels/data/map_dataset_op.cc +++ b/tensorflow/core/kernels/data/map_dataset_op.cc @@ -73,10 +73,11 @@ class MapDatasetOp::Dataset : public DatasetBase { } int64 Cardinality() const override { - if (preserve_cardinality_) + if (preserve_cardinality_) { return input_->Cardinality(); - else + } else { return kUnknownCardinality; + } } Status CheckExternalState() const override { diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 87af525c4d3..bae90549841 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -110,10 +110,11 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase { } int64 Cardinality() const override { - if (preserve_cardinality_) + if (preserve_cardinality_) { return input_->Cardinality(); - else + } else { return kUnknownCardinality; + } } Status CheckExternalState() const override { From b0e08f68d84476b941e8a821d653ab08129c92ce Mon Sep 17 00:00:00 2001 From: Milan Straka Date: Fri, 5 Jun 2020 12:53:08 +0200 Subject: [PATCH 016/178] Convert IndexedSlices to Tensors for backward ConcreteFunction calls. Fixes #36236. Currently no warning is produced. --- tensorflow/python/eager/function.py | 6 +++++- tensorflow/python/eager/function_test.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 37c802b9aa6..a8c55db383a 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -737,7 +737,11 @@ class _DelayedRewriteGradientFunctions(object): cleaned_doutputs = [] for doutput, placeholder in zip(doutputs, self._func_graph.outputs): if backprop_util.IsTrainable(placeholder): - if doutput is not None: + if isinstance(doutput, ops.IndexedSlices): + # Gradient passed to a backward ConcreteFunction must be tf.Tensor, + # so we convert tf.IndexedSlices to tf.Tensor. + cleaned_doutputs.append(ops.convert_to_tensor(doutput)) + elif doutput is not None: cleaned_doutputs.append(doutput) else: cleaned_doutputs.append(default_gradient.zeros_like(placeholder)) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 078ca8b8878..4a0ade71d8c 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -3800,6 +3800,23 @@ class FunctionTest(test.TestCase, parameterized.TestCase): c5_summary = 'func2(x=8, y)' self.assertEqual(c5.pretty_printed_signature(verbose=False), c5_summary) + @test_util.run_in_graph_and_eager_modes + def testIndexedSlicesAsGradientsForConcreteFunctions(self): + @def_function.function + def summing_rnn(inputs): + return math_ops.reduce_sum(inputs, axis=1) + + @def_function.function + def gradients(inputs): + with backprop.GradientTape() as tape: + tape.watch(inputs) + hidden = summing_rnn(inputs) + hidden = array_ops.gather(hidden, constant_op.constant([0])) + loss = math_ops.reduce_mean(hidden) + return tape.gradient(loss, inputs) + + gradients(constant_op.constant([[[1.0], [2.0]]])) # No error is raised + class MultiDeviceTest(test.TestCase, parameterized.TestCase): From c85618733e4d15cde81e44d4641bd9ed379153d5 Mon Sep 17 00:00:00 2001 From: bhack Date: Fri, 5 Jun 2020 13:34:02 +0200 Subject: [PATCH 017/178] Install Tensorflow CPU package for CPU images --- tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 index c540d03df7f..3f23124ff67 100644 --- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 +++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 @@ -75,7 +75,7 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.6 0 # Install given tensorflow or tf-nightly version, if not specified, install the # latest official release ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= -RUN pip install ${TF_PACKAGE}${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} +RUN pip install ${TF_PACKAGE}-cpu${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} # TODO(klimek): Figure out a better way to get the right include paths # forwarded when we install new packages. From 8d6fe0f7c5cd9048970944252b7c2a366b96563b Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Tue, 26 May 2020 01:14:52 +0530 Subject: [PATCH 018/178] [MLIR] Add ConvOp conversion from xla_hlo -> xla_lhlo -> linalg Add conversion for the convolution op from the xla_hlo dialect to xla_lhlo, and from xla_lhlo to linalg. The two conversion passes in context here are -hlo-legalize-to-lhlo and -lhlo-legalize-to-linalg. A part of the code for the LHLO to Linalg conversion is borrowed and adapted from IREE's xla_hlo to linalg conversion (https://github.com/google/iree/). Signed-off-by: Uday Bondhugula --- tensorflow/compiler/mlir/xla/BUILD | 5 + tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc | 1 + tensorflow/compiler/mlir/xla/ir/lhlo_ops.h | 2 + tensorflow/compiler/mlir/xla/ir/lhlo_ops.td | 30 ++++- .../mlir/xla/tests/hlo-legalize-to-lhlo.mlir | 40 ++++++- .../xla/tests/lhlo-legalize-to-linalg.mlir | 26 ++++- .../xla/transforms/hlo_legalize_to_lhlo.cc | 1 + .../mlir/xla/transforms/map_hlo_to_lhlo_op.h | 1 + .../xla/transforms/xla_legalize_to_linalg.cc | 103 ++++++++++++++++++ 9 files changed, 203 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index 736651b5022..972d5ea2c51 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -28,6 +28,8 @@ package_group( exports_files(["ir/hlo_ops.td"]) +exports_files(["ir/lhlo_ops.td"]) + filegroup( name = "hlo_ops_td_files", srcs = [ @@ -87,6 +89,8 @@ gentbl( tbl_outs = [ ("-gen-op-decls", "ir/lhlo_ops.h.inc"), ("-gen-op-defs", "ir/lhlo_ops.cc.inc"), + ("-gen-struct-attr-decls", "ir/lhlo_structs.h.inc"), + ("-gen-struct-attr-defs", "ir/lhlo_structs.cc.inc"), ], tblgen = "@llvm-project//mlir:mlir-tblgen", td_file = "ir/lhlo_ops.td", @@ -362,6 +366,7 @@ cc_library( ":map_hlo_to_lhlo_op", "@com_google_absl//absl/memory", "@llvm-project//mlir:IR", + "@llvm-project//mlir:LinalgOps", "@llvm-project//mlir:Pass", "@llvm-project//mlir:StandardOps", "@llvm-project//mlir:Transforms", diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc index 6f9b39377af..24cffa756ec 100644 --- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc +++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.cc @@ -45,6 +45,7 @@ limitations under the License. #include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h.inc" namespace mlir { +#include "tensorflow/compiler/mlir/xla/ir/lhlo_structs.cc.inc" namespace xla_lhlo { XlaLhloDialect::XlaLhloDialect(MLIRContext *context) diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h index 3827e8a7a4e..6ea5e2522c2 100644 --- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h +++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.h @@ -33,6 +33,8 @@ limitations under the License. namespace mlir { class OpBuilder; +#include "tensorflow/compiler/mlir/xla/ir/lhlo_structs.h.inc" + namespace xla_lhlo { class XlaLhloDialect : public Dialect { diff --git a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td index d9f3648bb09..6ba9935d85e 100644 --- a/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/lhlo_ops.td @@ -407,11 +407,39 @@ def LHLO_ConcatenateOp : LHLO_Op<"concatenate", []>, BASE_HLO_ConcatenateOp { ); } +// TODO(bondhugula): Make this struct dialect independent so that it can be +// shared between the HLO and LHLO dialects. +def ConvDimensionNumbers : StructAttr<"ConvDimensionNumbers", LHLO_Dialect, [ + StructFieldAttr<"input_batch_dimension",I64Attr>, + StructFieldAttr<"input_feature_dimension", I64Attr>, + StructFieldAttr<"input_spatial_dimensions", I64ElementsAttr>, + StructFieldAttr<"kernel_input_feature_dimension", I64Attr>, + StructFieldAttr<"kernel_output_feature_dimension", I64Attr>, + StructFieldAttr<"kernel_spatial_dimensions", I64ElementsAttr>, + StructFieldAttr<"output_batch_dimension", I64Attr>, + StructFieldAttr<"output_feature_dimension", I64Attr>, + StructFieldAttr<"output_spatial_dimensions", I64ElementsAttr>] > { + + let description = "Structure of dimension information for conv op"; +} + def LHLO_ConvOp : LHLO_Op<"convolution", []>, BASE_HLO_ConvOp { let arguments = (ins Arg:$lhs, Arg:$rhs, - Arg:$output + Arg:$output, + // Default value: one for each of the spatial dimension. + OptionalAttr:$window_strides, + // Default value: zero for each of the spatial dimension. + OptionalAttr:$padding, + // Default value: one for each of the spatial dimension. + OptionalAttr:$lhs_dilation, + // Default value: one for each of the spatial dimension. + OptionalAttr:$rhs_dilation, + ConvDimensionNumbers:$dimension_numbers, + I64Attr:$feature_group_count, + I64Attr:$batch_group_count, + HLO_PrecisionConfigAttr:$precision_config ); } diff --git a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir index 38ea818aea8..aca4bf5865d 100644 --- a/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir +++ b/tensorflow/compiler/mlir/xla/tests/hlo-legalize-to-lhlo.mlir @@ -432,7 +432,39 @@ func @dot(%arg0: tensor<1024x1024xf32>) -> tensor<1024x1024xf32> { // CHECK-SAME: (%[[ARG0:.*]]: [[TYPE:.*]], // CHECK-SAME: %[[RESULT:.*]]: [[TYPE]]) // CHECK: "xla_lhlo.dot"(%[[ARG0]], %[[ARG0]], %{{.*}}) : ([[TYPE]], [[TYPE]], [[TYPE]]) -> () - %dot = "xla_hlo.dot"(%arg0, %arg0) - : (tensor<1024x1024xf32>, tensor<1024x1024xf32>) -> tensor<1024x1024xf32> - return %dot : tensor<1024x1024xf32> - } + %dot = "xla_hlo.dot"(%arg0, %arg0) + : (tensor<1024x1024xf32>, tensor<1024x1024xf32>) -> tensor<1024x1024xf32> + return %dot : tensor<1024x1024xf32> +} + +// ----- + +// CHECK-LABEL: func @conv +func @conv(%input: tensor<3x5x5x3xf32>, %filter : tensor<2x2x3x4xf32>) -> tensor<3x5x5x4xf32> { + %c0 = constant 0 : index + // CHECK: %[[OUT:.*]] = alloc() : memref<3x5x5x4xf32> + // CHECK: "xla_lhlo.convolution"(%{{.+}}, %{{.+}}, %[[OUT]]) + // CHECK-SAME: padding = dense<[ + // CHECK-SAME: [0, 1], [0, 1]]> : tensor<2x2xi64> + // CHECK-SAME: rhs_dilation = dense<[1, 2]> + // CHECK-SAME: window_strides = dense<[2, 1]> + %out = "xla_hlo.convolution"(%filter, %input) { + batch_group_count = 1 : i64, + dimension_numbers = { + input_batch_dimension = 0 : i64, + input_feature_dimension = 3 : i64, + input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, + kernel_input_feature_dimension = 2 : i64, + kernel_output_feature_dimension = 3 : i64, + kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, + output_batch_dimension = 0 : i64, + output_feature_dimension = 3 : i64, + output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64> + }, + feature_group_count = 1 : i64, + padding = dense<[[0, 1], [0, 1]]> : tensor<2x2xi64>, + rhs_dilation = dense<[1, 2]> : tensor<2xi64>, + window_strides = dense<[2, 1]> : tensor<2xi64> + } : (tensor<2x2x3x4xf32>, tensor<3x5x5x3xf32>) -> tensor<3x5x5x4xf32> + return %out : tensor<3x5x5x4xf32> +} diff --git a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir index 626e905695c..ce5d0d28076 100644 --- a/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir +++ b/tensorflow/compiler/mlir/xla/tests/lhlo-legalize-to-linalg.mlir @@ -469,7 +469,7 @@ func @negi(%input: memref<2x2xi32>, %result: memref<2x2xi32>) { } // CHECK: linalg.generic // CHECK-NEXT: ^bb0(%[[OPERAND_IN:.*]]: i32, %[[RESULT_OUT:.*]]): -// CHECK-NEXT: %[[L0:.*]] = constant 0 : i32 +// CHECK-NEXT: %[[L0:.*]] = constant 0 : i32 // CHECK-NEXT: %[[RESULT:.*]] = subi %[[L0]], %[[OPERAND_IN]] : i32 // CHECK-NEXT: linalg.yield %[[RESULT]] : i32 @@ -649,3 +649,27 @@ func @reverse(%arg0: memref<2x3xf32>, %arg1: memref<2x3xf32>) { return } // CHECK: linalg.generic {{{.*}}indexing_maps = [#[[OPERAND_MAP]], #[[RESULT_MAP]]] + + +// ----- + +func @conv(%input: memref<3x5x5x3xf32>, %filter: memref<2x2x3x4xf32>, %output: memref<3x5x5x4xf32>) { + %c0 = constant 0 : index + %0 = alloc() : memref<3x5x5x4xf32> + // CHECK: linalg.conv(%{{.+}}, %{{.+}}, %{{.+}}) + // CHECK-SAME: dilations = [1, 2] + // CHECK-SAME: padding = dense<{{\[\[}}0, 1], [0, 1]]> : tensor<2x2xi64> + // CHECK-SAME: strides = [2, 1]} + // With all atributes explicitly specified. + "xla_lhlo.convolution"(%filter, %input, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, padding = dense<[[0, 1], [0, 1]]> : tensor<2x2xi64>, rhs_dilation = dense<[1, 2]> : tensor<2xi64>, window_strides = dense<[2, 1]> : tensor<2xi64>} : (memref<2x2x3x4xf32>, memref<3x5x5x3xf32>, memref<3x5x5x4xf32>) -> () + + // Dilation left unspecified, sets default dilation since linalg expects it. + // CHECK: linalg.conv(%{{.+}}, %{{.+}}, %{{.+}}) + // CHECK-SAME: dilations = [1, 1] + // Padding is not set if it's zero. + // CHECK-NOT: padding + "xla_lhlo.convolution"(%filter, %input, %0) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64, input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64, kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64, output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, window_strides = dense<[2, 1]> : tensor<2xi64>} : (memref<2x2x3x4xf32>, memref<3x5x5x3xf32>, memref<3x5x5x4xf32>) -> () + + "xla_lhlo.copy"(%0, %output) : (memref<3x5x5x4xf32>, memref<3x5x5x4xf32>) -> () + "xla_lhlo.terminator"() : () -> () +} diff --git a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc index 6f5bafef4c0..45aed7e10ff 100644 --- a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc +++ b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc @@ -424,6 +424,7 @@ void populateHLOToLHLOConversionPattern( HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, + HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, HloToLhloOpConverter, diff --git a/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h b/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h index 21b954a3eb4..4b9397795a1 100644 --- a/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h +++ b/tensorflow/compiler/mlir/xla/transforms/map_hlo_to_lhlo_op.h @@ -45,6 +45,7 @@ MAP_HLO_TO_LHLO(CeilOp); MAP_HLO_TO_LHLO(ConstOp); MAP_HLO_TO_LHLO(CompareOp); MAP_HLO_TO_LHLO(ComplexOp); +MAP_HLO_TO_LHLO(ConvOp); MAP_HLO_TO_LHLO(ConvertOp); MAP_HLO_TO_LHLO(CopyOp); MAP_HLO_TO_LHLO(CosOp); diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc index 2b496677d62..fd0c9541e7c 100644 --- a/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc +++ b/tensorflow/compiler/mlir/xla/transforms/xla_legalize_to_linalg.cc @@ -192,6 +192,108 @@ class ScalarPointwiseToStandardConverter : public OpConversionPattern { } }; +//===----------------------------------------------------------------------===// +// xla_lhlo.convolution conversion pattern. +//===----------------------------------------------------------------------===// + +/// Converts xla_lhlo.convolution operation to a linalg.conv op. +struct ConvToLinalgConverter : public OpConversionPattern { + public: + using OpConversionPattern::OpConversionPattern; + + // This code has been adapted from IREE's + // (https://github.com/google/iree/) xla_hlo -> linalg conversion. + LogicalResult matchAndRewrite( + xla_lhlo::ConvOp op, ArrayRef args, + ConversionPatternRewriter& rewriter) const final { + // Check validity of dimension information. + if (const xla_lhlo::ConvDimensionNumbers& dimensionNumbers = + op.dimension_numbers()) { + const int inputSpatialRank = + llvm::size(dimensionNumbers.input_spatial_dimensions()); + // The dimensions for input should follow the order of + // batch_count, spatial_dims..., input_feature_count. + if (dimensionNumbers.input_batch_dimension().getInt() != 0 || + dimensionNumbers.input_feature_dimension().getInt() != + (inputSpatialRank + 1)) + return failure(); + + const int kernelSpatialRank = + llvm::size(dimensionNumbers.kernel_spatial_dimensions()); + // The dimensions for filter should follow the order of + // spatial_dims..., input_feature_count, num_output_feature_count. + if (dimensionNumbers.kernel_input_feature_dimension().getInt() != + kernelSpatialRank || + dimensionNumbers.kernel_output_feature_dimension().getInt() != + (kernelSpatialRank + 1)) + return failure(); + + const int outputSpatialRank = + llvm::size(dimensionNumbers.output_spatial_dimensions()); + // The dimensions for output should follow the order of + // batch_count, spatial_dims.., output_feature_count. + if (dimensionNumbers.output_batch_dimension().getInt() != 0 || + dimensionNumbers.output_feature_dimension().getInt() != + (outputSpatialRank + 1)) + return failure(); + + if (inputSpatialRank != outputSpatialRank || + inputSpatialRank != kernelSpatialRank) + return failure(); + + auto inputSpatialDim = + dimensionNumbers.input_spatial_dimensions().begin(); + auto kernelSpatialDim = + dimensionNumbers.kernel_spatial_dimensions().begin(); + auto outputSpatialDim = + dimensionNumbers.output_spatial_dimensions().begin(); + // Check if spatial dims are ordered correctly. + for (int i = 0; i < inputSpatialRank; ++i) { + const int dim = i + 1; + if ((*inputSpatialDim++).getZExtValue() != dim || + (*outputSpatialDim++).getZExtValue() != dim || + (*kernelSpatialDim++).getZExtValue() != i) + return failure(); + } + } + + // TODO: LHS dilation for deconvolution not supported yet. + if (op.lhs_dilation()) { + return failure(); + } + + llvm::SmallVector strides; + if (auto windowStrides = op.window_strides()) { + auto range = windowStrides->getAttributeValues(); + strides.assign(range.begin(), range.end()); + } + auto stridesArg = ArrayAttr::get(strides, op.getContext()); + + llvm::SmallVector dilation; + if (auto rhsDilation = op.rhs_dilation()) { + auto range = rhsDilation->getAttributeValues(); + dilation.assign(range.begin(), range.end()); + } else { + // Default dilation of 1. + dilation.resize(2, IntegerAttr::get(rewriter.getIntegerType(64), 1)); + } + auto dilationArg = ArrayAttr::get(dilation, op.getContext()); + + // Set padding only if it is non-zero. + DenseIntElementsAttr padding = op.paddingAttr(); + if (!padding || !llvm::any_of(padding.getValues(), [](APInt intVal) { + return !intVal.isNullValue(); + })) { + padding = nullptr; + } + + // The order of input and filter are switched with linalg.conv. + rewriter.replaceOpWithNewOp( + op, args[1], args[0], args[2], stridesArg, dilationArg, padding); + return success(); + } +}; + /// Base class for lowering xla operations that have one operand and one result, /// and are semantically equivalent to a copy of the input to the output (like /// transpose, some reshape, etc.). The derived classes need to provide a method @@ -641,6 +743,7 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context, patterns->insert, BroadcastInDimConverter, ConstConverter, + ConvToLinalgConverter, IotaConverter, PointwiseToLinalgConverter, PointwiseToLinalgConverter, From f7301c125ce8ecde7d5736edb838172fe23272d4 Mon Sep 17 00:00:00 2001 From: bhack Date: Fri, 5 Jun 2020 20:38:07 +0200 Subject: [PATCH 019/178] Update Dockerfile.custom_op_ubuntu_16 --- tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 index 3f23124ff67..0f02c34b057 100644 --- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 +++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 @@ -75,7 +75,7 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.6 0 # Install given tensorflow or tf-nightly version, if not specified, install the # latest official release ARG TF_PACKAGE=tensorflow ARG TF_PACKAGE_VERSION= -RUN pip install ${TF_PACKAGE}-cpu${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} +RUN pip3 install ${TF_PACKAGE}-cpu${TF_PACKAGE_VERSION:+==${TF_PACKAGE_VERSION}} # TODO(klimek): Figure out a better way to get the right include paths # forwarded when we install new packages. From 52e2de09b7b06289db0c6ab1a5b4eb84a9c0cf71 Mon Sep 17 00:00:00 2001 From: bhack Date: Fri, 5 Jun 2020 23:30:11 +0200 Subject: [PATCH 020/178] Delete Dockerfile.custom_op --- .../tools/ci_build/Dockerfile.custom_op | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 tensorflow/tools/ci_build/Dockerfile.custom_op diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op b/tensorflow/tools/ci_build/Dockerfile.custom_op deleted file mode 100644 index 4493b88348b..00000000000 --- a/tensorflow/tools/ci_build/Dockerfile.custom_op +++ /dev/null @@ -1,19 +0,0 @@ -FROM ubuntu:14.04 - -LABEL maintainer="Yifei Feng " - -# Copy and run the install scripts. -COPY install/*.sh /install/ -RUN /install/install_bootstrap_deb_packages.sh -RUN add-apt-repository -y ppa:openjdk-r/ppa && \ - add-apt-repository -y ppa:george-edison55/cmake-3.x -RUN /install/install_deb_packages.sh -RUN /install/install_pip_packages.sh -RUN /install/install_bazel.sh -RUN /install/install_proto3.sh -RUN /install/install_buildifier.sh -RUN /install/install_auditwheel.sh -RUN /install/install_golang.sh - -# Set up the master bazelrc configuration file. -COPY install/.bazelrc /etc/bazel.bazelrc From a894d7a9369d454d004138b2639232bda49381a5 Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 5 Jun 2020 16:09:52 -0700 Subject: [PATCH 021/178] remove duplicate registration for softmax bf16 op --- tensorflow/core/kernels/mkl_tmp_bf16_ops.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl_tmp_bf16_ops.cc b/tensorflow/core/kernels/mkl_tmp_bf16_ops.cc index ed5fec677e8..9b2d09fb827 100644 --- a/tensorflow/core/kernels/mkl_tmp_bf16_ops.cc +++ b/tensorflow/core/kernels/mkl_tmp_bf16_ops.cc @@ -58,9 +58,7 @@ namespace tensorflow { REGISTER_KERNEL_BUILDER( \ Name("_FusedMatMul").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); \ REGISTER_KERNEL_BUILDER( \ - Name("BatchMatMulV2").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Softmax").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); + Name("BatchMatMulV2").Device(DEVICE_CPU).TypeConstraint("T"), NoOp); TF_CALL_bfloat16(REGISTER_CPU); #undef REGISTER_CPU From 6bae8af83b492cfdc11289de24626da4a74f48bd Mon Sep 17 00:00:00 2001 From: nammbash Date: Fri, 5 Jun 2020 16:15:49 -0700 Subject: [PATCH 022/178] Make bf16 default with mkl and print out proper error message for legacy systems such as broadwell --- tensorflow/core/graph/mkl_graph_util.h | 25 +++++++++++++++++++++++-- tensorflow/tensorflow.bzl | 2 +- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 22a4814f3f8..3ea23250fc5 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -17,9 +17,12 @@ limitations under the License. #define TENSORFLOW_CORE_GRAPH_MKL_GRAPH_UTIL_H_ #ifdef INTEL_MKL +#include "absl/base/call_once.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/cpu_info.h" namespace tensorflow { // Since our ops are going to produce and also consume N addition tensors @@ -122,6 +125,23 @@ inline string GetMklEagerOpName(const string& name) { return string(kMklEagerOpPrefix) + name; } +#ifdef ENABLE_INTEL_MKL_BFLOAT16 +static inline bool CheckBfloat16Support(DataType T) { + static absl::once_flag cpu_bfloat16_warn_once_flag; + // Restrict bfloat16 ops to platforms with at least AVX512 support, fall back + // to Eigen implementation. + if (!(port::TestCPUFeature(port::CPUFeature::AVX512F)) && T == DT_BFLOAT16) { + absl::call_once(cpu_bfloat16_warn_once_flag, [] { + LOG(ERROR) + << "oneDNN BFloat16 support are only on platforms with AVX512. " + "Falling back to default implementation if present."; + }); + return false; + } + return true; +} +#endif + // Check whether opname with type T is registered as MKL operator // that can accept input tensors in MKL layout. // @@ -139,7 +159,7 @@ static inline bool IsMklLayoutDependentOp(const string& op_name, DataType T) { #ifdef ENABLE_INTEL_MKL_BFLOAT16 // Restrict regular ops to FLOAT and BFLOAT16 if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) { - return (T == DT_FLOAT || T == DT_BFLOAT16); + return (T == DT_FLOAT || (T == DT_BFLOAT16 && CheckBfloat16Support(T))); } #else // Restrict regular ops to FLOAT @@ -196,7 +216,8 @@ static inline bool IsMklNameChangeOp(const string& op_name, DataType T) { isTypeAllowed = (T == DT_COMPLEX128 || T == DT_COMPLEX64 || T == DT_DOUBLE || T == DT_FLOAT); #ifdef ENABLE_INTEL_MKL_BFLOAT16 - isTypeAllowed = isTypeAllowed || (T == DT_BFLOAT16); + isTypeAllowed = + isTypeAllowed || (T == DT_BFLOAT16 && CheckBfloat16Support(T)); #endif return isTypeAllowed; } diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 03c561f4fc1..b36a180bbf3 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -327,7 +327,7 @@ def tf_copts( if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + - if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1"]) + + if_mkl_v1_open_source_only(["-DENABLE_MKLDNN_V1", "-DENABLE_INTEL_MKL_BFLOAT16"]) + if_mkldnn_threadpool([ "-DENABLE_MKLDNN_THREADPOOL", "-DENABLE_MKLDNN_V1", From ff07590ce1a32851f49127aeaac70aa47d94160d Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sat, 6 Jun 2020 10:38:45 +0800 Subject: [PATCH 023/178] add test --- .../data/kernel_tests/cardinality_test.py | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/cardinality_test.py b/tensorflow/python/data/kernel_tests/cardinality_test.py index bbc8eac6b0c..0785aa464b9 100644 --- a/tensorflow/python/data/kernel_tests/cardinality_test.py +++ b/tensorflow/python/data/kernel_tests/cardinality_test.py @@ -26,6 +26,11 @@ from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import combinations from tensorflow.python.platform import test +def reduce_fn(x, y): + name, dataset_fn, expected_result = y + return x + combinations.combine( + dataset_fn=combinations.NamedObject(name, dataset_fn), + expected_result=expected_result) def _test_combinations(): # pylint: disable=g-long-lambda @@ -83,9 +88,6 @@ def _test_combinations(): lambda _: dataset_ops.Dataset.from_tensors(0), cycle_length=1, num_parallel_calls=1), dataset_ops.UNKNOWN), - ("Map1", lambda: dataset_ops.Dataset.range(5).map(lambda x: x), 5), - ("Map2", lambda: dataset_ops.Dataset.range(5).map( - lambda x: x, num_parallel_calls=1), 5), ("PaddedBatch1", lambda: dataset_ops.Dataset.range(5).padded_batch( 2, [], drop_remainder=True), 2), ("PaddedBatch2", lambda: dataset_ops.Dataset.range(5).padded_batch( @@ -150,14 +152,27 @@ def _test_combinations(): lambda _: True))), dataset_ops.UNKNOWN), ] - def reduce_fn(x, y): - name, dataset_fn, expected_result = y - return x + combinations.combine( - dataset_fn=combinations.NamedObject(name, dataset_fn), - expected_result=expected_result) + return functools.reduce(reduce_fn, cases, []) + +def _v1_only_test_combinations(): + # pylint: disable=g-long-lambda + cases = [ + ("Map1", lambda: dataset_ops.Dataset.range(5).map(lambda x: x), dataset_ops.UNKNOWN), + ("Map2", lambda: dataset_ops.Dataset.range(5).map( + lambda x: x, num_parallel_calls=1), dataset_ops.UNKNOWN), + ] return functools.reduce(reduce_fn, cases, []) +def _v2_only_test_combinations(): + # pylint: disable=g-long-lambda + cases = [ + ("Map1", lambda: dataset_ops.Dataset.range(5).map(lambda x: x), 5), + ("Map2", lambda: dataset_ops.Dataset.range(5).map( + lambda x: x, num_parallel_calls=1), 5), + ] + + return functools.reduce(reduce_fn, cases, []) class CardinalityTest(test_base.DatasetTestBase, parameterized.TestCase): """Tests for `tf.data.Dataset.cardinality()`.""" @@ -169,6 +184,21 @@ class CardinalityTest(test_base.DatasetTestBase, parameterized.TestCase): dataset = dataset_fn() self.assertEqual(self.evaluate(dataset.cardinality()), expected_result) + @combinations.generate( + combinations.times(combinations.combine(tf_api_version=1, + mode=["eager", "graph"]), + _v1_only_test_combinations())) + def testCardinalityV1Only(self, dataset_fn, expected_result): + dataset = dataset_fn() + self.assertEqual(self.evaluate(dataset.cardinality()), expected_result) + + @combinations.generate( + combinations.times(combinations.combine(tf_api_version=2, + mode=["eager", "graph"]), + _v2_only_test_combinations())) + def testCardinalityV2Only(self, dataset_fn, expected_result): + dataset = dataset_fn() + self.assertEqual(self.evaluate(dataset.cardinality()), expected_result) if __name__ == "__main__": test.main() From b0375e267c55ae97e65b50ce51200e012ac3e8ad Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 6 Jun 2020 01:24:20 +0000 Subject: [PATCH 024/178] Fix floating point exception with tf.unravel_index This PR tries to address the issue raised in 40204 where `tf.unravel_index` caused floating point exception when any one element is 0 in `dims`. The issue is that `indices` in `tf.unravel_index` should make sure it is not out of boundary compared to `dims`. This PR fixes the issue by adding a check before hand, though Eigen. This PR fixes 40204. Signed-off-by: Yong Tang --- tensorflow/core/kernels/unravel_index_op.cc | 18 ++++++++++++++++++ .../python/kernel_tests/array_ops_test.py | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc index 8d839ba85a7..d41915e5c14 100644 --- a/tensorflow/core/kernels/unravel_index_op.cc +++ b/tensorflow/core/kernels/unravel_index_op.cc @@ -54,6 +54,24 @@ class UnravelIndexOp : public OpKernel { auto dims = dims_tensor.vec(); + // Chek to make sure indices is not out of boundary + Eigen::Tensor check; + if (TensorShapeUtils::IsScalar(indices_tensor.shape())) { + auto indices = indices_tensor.scalar(); + auto dims_prod = dims.prod(); + check = (indices < dims_prod).all(); + } else { + auto indices = indices_tensor.vec(); + auto dims_prod = dims.prod() + .reshape(Eigen::array({1})) + .broadcast( + Eigen::array({indices_tensor.NumElements()})); + check = (indices < dims_prod).all(); + } + OP_REQUIRES( + ctx, check(), + errors::InvalidArgument("index is out of bound as with dims")); + Eigen::array reverse({true}); Tensor strides_tensor; diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index ec3ed932996..caf05042557 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1384,6 +1384,16 @@ class UnravelIndexTest(test_util.TensorFlowTestCase): out_3 = array_ops.unravel_index(indices_3, dims_3) self.assertAllEqual(out_3.eval(), [[3, 6, 6], [4, 5, 1]]) + # Test case for GitHub issue 40204. + def testUnravelIndexZeroDim(self): + with self.cached_session(): + for dtype in [dtypes.int32, dtypes.int64]: + with self.assertRaisesRegexp( + errors.InvalidArgumentError, "index is out of bound as with dims"): + indices = constant_op.constant([2, 5, 7], dtype=dtype) + dims = constant_op.constant([3, 0], dtype=dtype) + self.evaluate(array_ops.unravel_index(indices=indices, dims=dims)) + class GuaranteeConstOpTest(test_util.TensorFlowTestCase): From c949172f5d5ac2f8c582c978dcfd3ef87773a5e3 Mon Sep 17 00:00:00 2001 From: wyzhao Date: Tue, 14 Apr 2020 17:22:39 +0800 Subject: [PATCH 025/178] add a pass to outline kLoop/kInput fusion pattern in xla_hlo dialect --- tensorflow/compiler/mlir/xla/BUILD | 74 +++ tensorflow/compiler/mlir/xla/ir/hlo_ops.h | 1 + tensorflow/compiler/mlir/xla/ir/hlo_ops.td | 55 +- .../xla/ir/infer_fusibility_op_interface.cc | 22 + .../xla/ir/infer_fusibility_op_interface.h | 28 + .../xla/ir/infer_fusibility_op_interface.td | 161 +++++ .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 5 + .../mlir/xla/tests/xla-hlo-fusion.mlir | 97 +++ .../mlir/xla/transforms/cycle_detector.cc | 339 +++++++++++ .../mlir/xla/transforms/cycle_detector.h | 164 +++++ .../xla/transforms/cycle_detector_test.cc | 91 +++ .../compiler/mlir/xla/transforms/passes.h | 3 + .../mlir/xla/transforms/xla_hlo_fusion.cc | 568 ++++++++++++++++++ 13 files changed, 1605 insertions(+), 3 deletions(-) create mode 100644 tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.cc create mode 100644 tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h create mode 100644 tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.td create mode 100644 tensorflow/compiler/mlir/xla/tests/xla-hlo-fusion.mlir create mode 100644 tensorflow/compiler/mlir/xla/transforms/cycle_detector.cc create mode 100644 tensorflow/compiler/mlir/xla/transforms/cycle_detector.h create mode 100644 tensorflow/compiler/mlir/xla/transforms/cycle_detector_test.cc create mode 100644 tensorflow/compiler/mlir/xla/transforms/xla_hlo_fusion.cc diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD index 736651b5022..d002a498c90 100644 --- a/tensorflow/compiler/mlir/xla/BUILD +++ b/tensorflow/compiler/mlir/xla/BUILD @@ -35,6 +35,7 @@ filegroup( "ir/hlo_ops.td", "ir/hlo_ops_base.td", "ir/hlo_utils.td", + "ir/infer_fusibility_op_interface.td", "ir/lhlo_ops.td", "@llvm-project//mlir:OpBaseTdFiles", "@llvm-project//mlir:include/mlir/Interfaces/InferTypeOpInterface.td", @@ -118,6 +119,42 @@ gentbl( td_srcs = [":hlo_ops_td_files"], ) +gentbl( + name = "infer_fusibility_op_interface_gen", + tbl_outs = [ + ( + "-gen-op-interface-decls", + "ir/infer_fusibility_op_interface.h.inc", + ), + ( + "-gen-op-interface-defs", + "ir/infer_fusibility_op_interface.cc.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "ir/infer_fusibility_op_interface.td", + td_srcs = [ + ":hlo_ops_td_files", + ], +) + +cc_library( + name = "infer_fusibility_op_interface", + srcs = [ + "ir/infer_fusibility_op_interface.cc", + ], + hdrs = [ + "ir/infer_fusibility_op_interface.h", + "ir/infer_fusibility_op_interface.h.inc", + ], + deps = [ + ":infer_fusibility_op_interface_gen", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Support", + ], + alwayslink = 1, +) + cc_library( name = "xla_legalize_tf", srcs = [ @@ -369,6 +406,40 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "cycle_detector", + srcs = ["transforms/cycle_detector.cc"], + hdrs = ["transforms/cycle_detector.h"], + deps = [ + "@llvm-project//llvm:core", + ], + alwayslink = 1, +) + +tf_cc_test( + name = "cycle_detector_test", + srcs = ["transforms/cycle_detector_test.cc"], + deps = [ + ":cycle_detector", + "//tensorflow/compiler/xla:test", + "//tensorflow/core:test_main", + ], +) + +cc_library( + name = "xla_hlo_fusion", + srcs = ["transforms/xla_hlo_fusion.cc"], + deps = [ + ":cycle_detector", + ":hlo", + "@llvm-project//llvm:core", + "@llvm-project//mlir:Pass", + "@llvm-project//mlir:StandardOps", + "@llvm-project//mlir:Support", + ], + alwayslink = 1, +) + gentbl( name = "xla_legalize_to_standard_inc_gen", tbl_outs = [ @@ -555,6 +626,7 @@ cc_library( ":convert_op_folder", ":hlo_ops_base_inc_gen", ":hlo_ops_inc_gen", + ":infer_fusibility_op_interface", ":xla_canonicalize_inc_gen", "@com_google_absl//absl/container:flat_hash_set", "@llvm-project//llvm:support", @@ -824,6 +896,7 @@ genrule( ":ir/hlo_ops.td", ":ir/hlo_ops_base.td", ":ir/hlo_utils.td", + ":ir/infer_fusibility_op_interface.td", ], outs = ["operator_writers.inc"], cmd = ("$(location :operator_writer_gen) " + @@ -859,6 +932,7 @@ cc_library( ":lhlo_legalize_to_gpu", ":lhlo_legalize_to_parallel_loops", ":xla_dialect_registration", + ":xla_hlo_fusion", ":xla_hlo_to_lhlo_with_xla", ":xla_legalize_control_flow", ":xla_legalize_tf", diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.h b/tensorflow/compiler/mlir/xla/ir/hlo_ops.h index 9725a0684f6..d3e9e8d111e 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.h +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.h @@ -30,6 +30,7 @@ limitations under the License. #include "mlir/IR/Types.h" // from @llvm-project #include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project #include "mlir/Interfaces/SideEffectInterfaces.h" // from @llvm-project +#include "tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h" namespace mlir { class OpBuilder; diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td index c82322b798f..72a369120c1 100644 --- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td +++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td @@ -26,6 +26,7 @@ include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td" include "tensorflow/compiler/mlir/xla/ir/hlo_utils.td" +include "tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.td" def HLO_Dialect : Dialect { let name = "xla_hlo"; @@ -117,7 +118,7 @@ def HLO_CreateTokenOp : HLO_Op<"create_token", [NoSideEffect]> { class HLO_UnaryElementwiseOp traits, Type TensorType>: HLO_Op { + !listconcat(traits, [InferShapedTypeOpInterface, InferFusibilityOpInterface])> { let arguments = (ins TensorType:$operand); let results = (outs TensorType); let extraClassDeclaration = [{ @@ -132,6 +133,12 @@ class HLO_UnaryElementwiseOp traits, return deriveShapeFromFirstOperand(&builder, getOperation(), &reifiedReturnShapes); } + bool inferInputOutputShapeEquality(int input, int output) { + return true; + } + llvm::Optional inferEffectiveWorkloadShape() { + return getOperation()->getResult(0); + } }]; } @@ -257,7 +264,7 @@ def HLO_TanhOp: HLO_UnaryElementwiseOp<"tanh", // See https://www.tensorflow.org/xla/operation_semantics#element-wise_binary_arithmetic_operations class HLO_BinaryElementwiseOp traits> : - HLO_Op { + HLO_Op { let arguments = (ins HLO_Tensor:$lhs, HLO_Tensor:$rhs @@ -275,6 +282,15 @@ class HLO_BinaryElementwiseOp traits> : return deriveShapeFromFirstOperand(&builder, getOperation(), &reifiedReturnShapes); } + bool inferInputsShapeEquality(int lhs, int rhs) { + return true; + } + bool inferInputOutputShapeEquality(int input, int output) { + return true; + } + llvm::Optional inferEffectiveWorkloadShape() { + return getOperation()->getResult(0); + } }]; let results = (outs HLO_Tensor); @@ -598,7 +614,8 @@ def HLO_AllToAllOp : HLO_Op<"all_to_all", def HLO_ReduceOp: HLO_Op<"reduce", [ RecursiveSideEffects, SameVariadicOperandSize, - SingleBlockImplicitTerminator<"ReturnOp"> + SingleBlockImplicitTerminator<"ReturnOp">, + InferFusibilityOpInterface ]>, BASE_HLO_ReduceOp { let arguments = (ins Variadic:$operands, @@ -613,6 +630,15 @@ def HLO_ReduceOp: HLO_Op<"reduce", [ "ValueRange init_values, DenseIntElementsAttr dimensions" >]; + let extraClassDeclaration = [{ + bool isFusibleWithConsumer() { + return false; + } + llvm::Optional inferEffectiveWorkloadShape() { + return getOperation()->getOperand(0); + } + }]; + let hasFolder = 1; // TODO(hinsu): Verify that the attached body arguments and results are @@ -1360,4 +1386,27 @@ def HLO_DequantizeOp : HLO_Op<"dequantize", [NoSideEffect]>, let hasCustomHLOConverter = 1; } +def HLO_FusionOp : HLO_Op<"fusion", []> { + let summary = "Fusion operator"; + let description = [{ + Models the fusion instruction. + + A fusion op is consists of a group of basic ops (represented as a region + attached to it). It serves as a hint to the backend that it is beneficial + to emit the contained ops into a single loop nest or kernel. + }]; + let regions = (region SizedRegion<1>:$fused_computation); + + let arguments = (ins + Variadic:$operands + ); + + let results = (outs + Variadic:$results + ); + + // FusionOp has special conversion logic to HLO. + let hasCustomHLOConverter = 1; +} + #endif // HLO_OPS diff --git a/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.cc b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.cc new file mode 100644 index 00000000000..10727aeac3f --- /dev/null +++ b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.cc @@ -0,0 +1,22 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h" + +namespace mlir { + +#include "tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.cc.inc" + +} // namespace mlir diff --git a/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h new file mode 100644 index 00000000000..9bb6e7dc9e8 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h @@ -0,0 +1,28 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_ +#define TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_ + +#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/StandardTypes.h" + +namespace mlir { + +#include "tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.h.inc" + +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_XLA_IR_INFER_FUSIBILITY_OP_INTERFACE_H_ diff --git a/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.td b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.td new file mode 100644 index 00000000000..eb2c1ba3ffe --- /dev/null +++ b/tensorflow/compiler/mlir/xla/ir/infer_fusibility_op_interface.td @@ -0,0 +1,161 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file contains inferFusiblityOpInterface, which is used to guide +// fusion decision. + +#ifndef MLIR_INFER_FUSIBILITY_OP_INTERFACE +#define MLIR_INFER_FUSIBILITY_OP_INTERFACE + +include "mlir/IR/OpBase.td" + +// OpInterface to query if an op is fusible and to query the shape equality +// constraint among the inputs and outputs of an op. +def InferFusibilityOpInterface : OpInterface<"InferFusibilityOpInterface"> { + let description = [{ + Interface to query if an op is fusible and to query the shape equality + constraint among the inputs and outputs of an op. + }]; + + let methods = [ + InterfaceMethod< + /*desc=*/[{If true, this op can be fused with its operands + }], + /*retTy=*/"bool", + /*methodName=*/"isFusibleWithOperand", + /*args=*/(ins), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Returns whether this op can be fused with its operands + return true; + }] + >, + InterfaceMethod< + /*desc=*/[{If true, this op can be fused with its consumers + }], + /*retTy=*/"bool", + /*methodName=*/"isFusibleWithConsumer", + /*args=*/(ins), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Return whether this op can be fused withh its consumers + return true; + }] + >, + InterfaceMethod< + /*desc=*/"Return whether two inputs have the same shape (assuming no" + "implicit broadcasting).", + /*retTy=*/"bool", + /*methodName=*/"inferInputsShapeEquality", + /*args=*/(ins "int":$lhs, "int":$rhs), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Return whether two inputs have the same shape. + Operation *op = this->getOperation(); + assert(lhs < op->getNumOperands() && lhs >= 0 && + rhs < op->getNumOperands() && rhs >= 0); + if (lhs == rhs) return true; + + // if both lhs and rhs have static shapes, check them directly + Type lhs_ty = op->getOperand(lhs).getType(); + Type rhs_ty = op->getOperand(rhs).getType(); + auto lhs_shape_type = lhs_ty.dyn_cast_or_null(); + auto rhs_shape_type = rhs_ty.dyn_cast_or_null(); + if (!lhs_shape_type || !lhs_shape_type.hasStaticShape() || + !rhs_shape_type || !rhs_shape_type.hasStaticShape() || + lhs_shape_type.getRank() != rhs_shape_type.getRank()) { + return false; + } + return lhs_shape_type.getShape() == rhs_shape_type.getShape(); + }] + >, + InterfaceMethod< + /*desc=*/"Return whether two outputs have the same shape (assuming no" + " implicit broadcasting).", + /*retTy=*/"bool", + /*methodName=*/"inferOutputsShapeEquality", + /*args=*/(ins "int":$lhs, "int":$rhs), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Return whether two outputs have the same shape. + Operation *op = this->getOperation(); + assert(lhs < op->getNumResults() && lhs >= 0 && + rhs < op->getNumResults() && rhs >= 0); + if (lhs == rhs) return true; + + // if both lhs and rhs have static shapes, check them directly + Type lhs_ty = op->getResult(lhs).getType(); + Type rhs_ty = op->getResult(rhs).getType(); + auto lhs_shape_type = lhs_ty.dyn_cast_or_null(); + auto rhs_shape_type = rhs_ty.dyn_cast_or_null(); + if (!lhs_shape_type || !lhs_shape_type.hasStaticShape() || + !rhs_shape_type || !rhs_shape_type.hasStaticShape() || + lhs_shape_type.getRank() != rhs_shape_type.getRank()) { + return false; + } + return lhs_shape_type.getShape() == rhs_shape_type.getShape(); + }] + >, + InterfaceMethod< + /*desc=*/"Return whether the input and the output have the same" + " shape (assuming no implicit broadcasting).", + /*retTy=*/"bool", + /*methodName=*/"inferInputOutputShapeEquality", + /*args=*/(ins "int":$input, "int":$output), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Return whether the input and the output have the same shape. + Operation *op = this->getOperation(); + assert(input < op->getNumOperands() && input >= 0 && + output < op->getNumResults() && output >= 0); + + // if both input and output have static shapes, check them directly + Type input_ty = op->getOperand(input).getType(); + Type output_ty = op->getResult(output).getType(); + auto input_shape_type = input_ty.dyn_cast_or_null(); + auto output_shape_type = output_ty.dyn_cast_or_null(); + if (!input_shape_type || !input_shape_type.hasStaticShape() || + !output_shape_type || !output_shape_type.hasStaticShape() || + input_shape_type.getRank() != output_shape_type.getRank()) { + return false; + } + return input_shape_type.getShape() == output_shape_type.getShape(); + }] + >, + InterfaceMethod< + /*desc=*/[{Return the effective workload shape for the operation. + + Here the effective workload shape roughly represents the maximum + parallelism can be used during the codegen stage. It's used to check + the shape-compatibility of the operation. During fusion, we only + try to fuse shape-compatible ops for performace. + For example, the effective workload shape of an elementwise op is its + output shape, while the effective workload shape of a reduction op may + be its operand shape. + Return None if such an inference is not possible. + }], + /*retTy=*/"llvm::Optional", + /*methodName=*/"inferEffectiveWorkloadShape", + /*args=*/(ins), + /*methodBody=*/[{}], + /*defaultImplementation=*/[{ + /// Return effective workload size if possible, otherwise None. + return {}; + }] + >, + ]; +} + +#endif diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index 1c25625802f..e384b74da97 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -937,6 +937,11 @@ LogicalResult ExportXlaOp(WhileOp op, OpLoweringContext ctx) { return success(); } +LogicalResult ExportXlaOp(FusionOp op, OpLoweringContext ctx) { + // TODO: currently not supported. + return failure(); +} + } // namespace } // namespace xla_hlo } // namespace mlir diff --git a/tensorflow/compiler/mlir/xla/tests/xla-hlo-fusion.mlir b/tensorflow/compiler/mlir/xla/tests/xla-hlo-fusion.mlir new file mode 100644 index 00000000000..72df18695e1 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/tests/xla-hlo-fusion.mlir @@ -0,0 +1,97 @@ +// RUN: tf-opt %s -xla-hlo-fusion -split-input-file | FileCheck %s --dump-input-on-failure + +// CHECK-LABEL: func @multi_outputs_same +func @multi_outputs_same(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + %0 = "xla_hlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor + %1 = "xla_hlo.subtract"(%arg0, %0) : (tensor, tensor) -> tensor + %2 = "xla_hlo.add"(%1, %1) : (tensor, tensor) -> tensor + // CHECK: %[[RET:.*]]:2 = "xla_hlo.fusion" + // CHECK-NEXT: xla_hlo.add + // CHECK-NEXT: xla_hlo.subtract + // CHECK-NEXT: xla_hlo.add + // CHECK-NEXT: xla_hlo.return + return %1, %2 : tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @multi_outputs_same_2 +func @multi_outputs_same_2(%arg0: tensor, %arg1: tensor) -> (tensor, tensor, tensor) { + %0 = "xla_hlo.abs"(%arg0) : (tensor) -> tensor + %1 = "xla_hlo.abs"(%arg1) : (tensor) -> tensor + %2 = "xla_hlo.add"(%0, %1) : (tensor, tensor) -> tensor + %3 = "xla_hlo.abs"(%0) : (tensor) -> tensor + %4 = "xla_hlo.abs"(%1) : (tensor) -> tensor + // CHECK: %[[RET:.*]]:3 = "xla_hlo.fusion" + // CHECK-NEXT: xla_hlo.abs + // CHECK-NEXT: xla_hlo.abs + // CHECK-NEXT: xla_hlo.add + // CHECK-NEXT: xla_hlo.abs + // CHECK-NEXT: xla_hlo.abs + // CHECK-NEXT: xla_hlo.return + return %2, %3, %4 : tensor, tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @multi_outputs_not_sure_same +func @multi_outputs_not_sure_same(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + %0 = "xla_hlo.add"(%arg0, %arg0) : (tensor, tensor) -> tensor + // CHECK-NOT: xla_hlo.fusion + %1 = "xla_hlo.subtract"(%arg1, %arg1) : (tensor, tensor) -> tensor + return %0, %1 : tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @reduce +func @reduce(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + %0 = "xla_hlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor + %1 = "xla_hlo.subtract"(%arg0, %0) : (tensor, tensor) -> tensor + // CHECK: %[[RET0:.*]] = "xla_hlo.fusion" + // CHECK-NEXT: xla_hlo.add + // CHECK-NEXT: xla_hlo.subtract + // CHECK-NEXT: xla_hlo.return + // Currently we do not support fuse arguments and ops without direct producer-consumer + // relationship. Thus Reduce Op should not be fused with above two ops. + + %2 = xla_hlo.constant dense<0.000000e+00> : tensor + %3 = "xla_hlo.reduce"(%arg0, %2) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %4 = "xla_hlo.add"(%arg2, %arg3) : (tensor, tensor) -> tensor + "xla_hlo.return"(%4) : (tensor) -> () + }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor, tensor) -> tensor + %4 = "xla_hlo.add"(%3, %3) : (tensor, tensor) -> tensor + // Above two ops should not be fused since reduce op can not be + // fused with its consumer. + // CHECK-NOT: xla_hlo.fusion + + return %1, %4 : tensor, tensor +} + +// ----- + +// CHECK-LABEL: func @reduce_2 +func @reduce_2(%arg0: tensor, %arg1: tensor) -> (tensor, tensor) { + %0 = "xla_hlo.add"(%arg0, %arg1) : (tensor, tensor) -> tensor + %1 = "xla_hlo.subtract"(%arg0, %0) : (tensor, tensor) -> tensor + + %2 = xla_hlo.constant dense<0.000000e+00> : tensor + %3 = "xla_hlo.reduce"(%1, %2) ( { + ^bb0(%arg2: tensor, %arg3: tensor): + %4 = "xla_hlo.add"(%arg2, %arg3) : (tensor, tensor) -> tensor + "xla_hlo.return"(%4) : (tensor) -> () + }) {dimensions = dense<[1]> : tensor<1xi64>} : (tensor, tensor) -> tensor + // CHECK: %[[RET0:.*]]:2 = "xla_hlo.fusion" + // CHECK-NEXT: xla_hlo.add + // CHECK-NEXT: xla_hlo.subtract + // CHECK-NEXT: xla_hlo.constant + // CHECK-NEXT: xla_hlo.reduce + // CHECK: xla_hlo.return + + // Following op should not be fused with the above ops since reduce op can not be + // fused with its consumer. + // CHECK-NOT: xla_hlo.fusion + %4 = "xla_hlo.add"(%3, %3) : (tensor, tensor) -> tensor + return %1, %4 : tensor, tensor +} diff --git a/tensorflow/compiler/mlir/xla/transforms/cycle_detector.cc b/tensorflow/compiler/mlir/xla/transforms/cycle_detector.cc new file mode 100644 index 00000000000..31389f80ba6 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/transforms/cycle_detector.cc @@ -0,0 +1,339 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/xla/transforms/cycle_detector.h" + +#include +#include "llvm/ADT/DenseSet.h" + +namespace mlir { + +namespace { + +using NodeSet = llvm::DenseSet; +using OrderedNodeSet = OrderedSet; + +template +struct VecStruct { + typedef llvm::SmallVector type; +}; +template +using Vec = typename VecStruct::type; + +struct Node { + // rank number assigned by Pearce-Kelly algorithm + int32_t rank; + // Temporary marker used by depth-first-search + bool visited; + // User-supplied data + void* data; + // List of immediate predecessor nodes in graph + OrderedNodeSet in; + // List of immediate successor nodes in graph + OrderedNodeSet out; +}; + +} // namespace + + +struct GraphCycles::Rep { + Vec nodes; + // Indices for unused entries in nodes + Vec free_nodes; + + // Temporary state. + // Results of forward DFS + Vec deltaf; + // Results of backward DFS + Vec deltab; + // All nodes to reprocess + Vec list; + // Rank values to assign to list entries + Vec merged; + // Emulates recursion stack when doing depth first search + Vec stack; +}; + +GraphCycles::GraphCycles(int32_t num_nodes) : rep_(new Rep) { + rep_->nodes.reserve(num_nodes); + for (int32_t i = 0; i < num_nodes; ++i) { + Node* n = new Node; + n->visited = false; + n->data = nullptr; + n->rank = rep_->nodes.size(); + rep_->nodes.push_back(n); + } +} + +GraphCycles::~GraphCycles() { + for (Vec::size_type i = 0, e = rep_->nodes.size(); i < e; ++i) { + delete rep_->nodes[i]; + } + delete rep_; +} + +bool GraphCycles::HasEdge(int32_t x, int32_t y) const { + return rep_->nodes[x]->out.Contains(y); +} + +void GraphCycles::RemoveEdge(int32_t x, int32_t y) { + rep_->nodes[x]->out.Erase(y); + rep_->nodes[y]->in.Erase(x); + // No need to update the rank assignment since a previous valid + // rank assignment remains valid after an edge deletion. +} + +static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound); +static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound); +static void Reorder(GraphCycles::Rep* r); +static void Sort(const Vec&, Vec* delta); +static void MoveToList(GraphCycles::Rep* r, Vec* src, Vec* dst); +static void ClearVisitedBits(GraphCycles::Rep* r, const Vec& nodes); + +bool GraphCycles::InsertEdge(int32_t x, int32_t y) { + if (x == y) return false; + Rep* r = rep_; + Node* nx = r->nodes[x]; + if (!nx->out.Insert(y)) { + // Edge already exists. + return true; + } + + Node* ny = r->nodes[y]; + ny->in.Insert(x); + + if (nx->rank <= ny->rank) { + // New edge is consistent with existing rank assignment. + return true; + } + + // Current rank assignments are incompatible with the new edge. Recompute. + // We only need to consider nodes that fall in the range [ny->rank,nx->rank]. + if (ForwardDFS(r, y, nx->rank)) { + // Found a cycle. Undo the insertion and tell caller. + nx->out.Erase(y); + ny->in.Erase(x); + // Since we do not call Reorder() on this path, clear any visited + // markers left by ForwardDFS. + ClearVisitedBits(r, r->deltaf); + return false; + } + BackwardDFS(r, x, ny->rank); + Reorder(r); + return true; +} + +// Follows the edges from producer to consumer and searchs if the node having +// rank `n` can reach the node having rank `upper_bound` using a DFS search. +// When doing DFS search, We only consider the pathes that satisfy the ranks +// of the nodes of the path are all smaller than `upper_bound`. +// +// Returns true if such path exists. +static bool ForwardDFS(GraphCycles::Rep* r, int32_t n, int32_t upper_bound) { + // Avoid recursion since stack space might be limited. + // We instead keep a stack of nodes to visit. + r->deltaf.clear(); + r->stack.clear(); + r->stack.push_back(n); + while (!r->stack.empty()) { + n = r->stack.back(); + r->stack.pop_back(); + Node* nn = r->nodes[n]; + if (nn->visited) continue; + + nn->visited = true; + r->deltaf.push_back(n); + + for (auto w : nn->out.GetSequence()) { + Node* nw = r->nodes[w]; + if (nw->rank == upper_bound) { + return true; + } + if (!nw->visited && nw->rank < upper_bound) { + r->stack.push_back(w); + } + } + } + return false; +} + +// Follows the edges from consumer to producer and visit all the nodes that +// is reachable from node `n` and have rank larger than `lower_bound`. +static void BackwardDFS(GraphCycles::Rep* r, int32_t n, int32_t lower_bound) { + r->deltab.clear(); + r->stack.clear(); + r->stack.push_back(n); + while (!r->stack.empty()) { + n = r->stack.back(); + r->stack.pop_back(); + Node* nn = r->nodes[n]; + if (nn->visited) continue; + + nn->visited = true; + r->deltab.push_back(n); + + for (auto w : nn->in.GetSequence()) { + Node* nw = r->nodes[w]; + if (!nw->visited && lower_bound < nw->rank) { + r->stack.push_back(w); + } + } + } +} + +// Recomputes rank assignments to make them compatible with the edges (producer +// has smaller rank than its consumer) +static void Reorder(GraphCycles::Rep* r) { + Sort(r->nodes, &r->deltab); + Sort(r->nodes, &r->deltaf); + + // Adds contents of delta lists to list (backwards deltas first). + r->list.clear(); + MoveToList(r, &r->deltab, &r->list); + MoveToList(r, &r->deltaf, &r->list); + + // Produce sorted list of all ranks that will be reassigned. + r->merged.resize(r->deltab.size() + r->deltaf.size()); + std::merge(r->deltab.begin(), r->deltab.end(), r->deltaf.begin(), + r->deltaf.end(), r->merged.begin()); + + // Assign the ranks in order to the collected list. + for (Vec::size_type i = 0, e = r->list.size(); i < e; ++i) { + r->nodes[r->list[i]]->rank = r->merged[i]; + } +} + +// Sorts nodes in the vector according to their ranks. Small rank first. +static void Sort(const Vec& nodes, Vec* delta) { + struct ByRank { + const Vec* nodes; + bool operator()(int32_t a, int32_t b) const { + return (*nodes)[a]->rank < (*nodes)[b]->rank; + } + }; + ByRank cmp; + cmp.nodes = &nodes; + std::sort(delta->begin(), delta->end(), cmp); +} + +// Collects ranks of nodes in vector `src` to vector `dst` +static void MoveToList(GraphCycles::Rep* r, Vec* src, Vec* dst) { + for (Vec::size_type i = 0, e = src->size(); i < e; i++) { + int32_t w = (*src)[i]; + // Replace src entry with its rank + (*src)[i] = r->nodes[w]->rank; + // Prepare for future DFS calls + r->nodes[w]->visited = false; + dst->push_back(w); + } +} + +// Clears bookkeeping fileds used during the last DFS process. +static void ClearVisitedBits(GraphCycles::Rep* r, const Vec& nodes) { + for (Vec::size_type i = 0, e = nodes.size(); i < e; i++) { + r->nodes[nodes[i]]->visited = false; + } +} + +bool GraphCycles::IsReachable(int32_t x, int32_t y) { + if (x == y) return true; + Rep* r = rep_; + Node* nx = r->nodes[x]; + Node* ny = r->nodes[y]; + + if (nx->rank >= ny->rank) { + // x cannot reach y since it is after it in the topological ordering + return false; + } + + // See if x can reach y using a DFS search that is limited to y's rank + bool reachable = ForwardDFS(r, x, ny->rank); + + // Clear any visited markers left by ForwardDFS. + ClearVisitedBits(r, r->deltaf); + return reachable; +} + +llvm::Optional GraphCycles::ContractEdge(int32_t a, int32_t b) { + assert(HasEdge(a, b)); + RemoveEdge(a, b); + + if (IsReachable(a, b)) { + // Restore the graph to its original state. + InsertEdge(a, b); + return {}; + } + + if (rep_->nodes[b]->in.Size() + rep_->nodes[b]->out.Size() > + rep_->nodes[a]->in.Size() + rep_->nodes[a]->out.Size()) { + // Swap "a" and "b" to minimize copying. + std::swap(a, b); + } + + Node* nb = rep_->nodes[b]; + OrderedNodeSet out = std::move(nb->out); + OrderedNodeSet in = std::move(nb->in); + for (int32_t y : out.GetSequence()) { + rep_->nodes[y]->in.Erase(b); + } + for (int32_t y : in.GetSequence()) { + rep_->nodes[y]->out.Erase(b); + } + rep_->free_nodes.push_back(b); + + rep_->nodes[a]->out.Reserve(rep_->nodes[a]->out.Size() + out.Size()); + for (int32_t y : out.GetSequence()) { + InsertEdge(a, y); + } + + rep_->nodes[a]->in.Reserve(rep_->nodes[a]->in.Size() + in.Size()); + for (int32_t y : in.GetSequence()) { + InsertEdge(y, a); + } + + // Note, if the swap happened it might be what originally was called "b". + return a; +} + +std::vector GraphCycles::SuccessorsCopy(int32_t node) const { + return rep_->nodes[node]->out.GetSequence(); +} + +namespace { +void SortInPostOrder(const Vec& nodes, + std::vector* to_sort) { + std::sort(to_sort->begin(), to_sort->end(), [&](int32_t a, int32_t b) { + return nodes[a]->rank > nodes[b]->rank; + }); +} +} // namespace + +std::vector GraphCycles::AllNodesInPostOrder() const { + llvm::DenseSet free_nodes_set; + for (int32_t n : rep_->free_nodes) free_nodes_set.insert(n); + + std::vector all_nodes; + all_nodes.reserve(rep_->nodes.size() - free_nodes_set.size()); + for (size_t i = 0, e = rep_->nodes.size(); i < e; i++) { + if (!free_nodes_set.count(i)) { + all_nodes.push_back(i); + } + } + + SortInPostOrder(rep_->nodes, &all_nodes); + return all_nodes; +} + +} // namespace mlir diff --git a/tensorflow/compiler/mlir/xla/transforms/cycle_detector.h b/tensorflow/compiler/mlir/xla/transforms/cycle_detector.h new file mode 100644 index 00000000000..7ae0b03c6f0 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/transforms/cycle_detector.h @@ -0,0 +1,164 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_CYCLE_DETECTOR_H_ +#define TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_CYCLE_DETECTOR_H_ + +#include +#include "llvm/ADT/DenseMap.h" + +namespace mlir { + +// ------------------------------------------------------------------- + +// This file contains a light version of GraphCycles implemented in +// tensorflow/compiler/jit/graphcycles/graphcycles.h +// +// We re-implement it here because we do not want to rely +// on TensorFlow data structures, and hence we can move +// corresponding passes to llvm repo. easily in case necessnary. + +// -------------------------------------------------------------------- + +// This is a set data structure that provides a deterministic iteration order. +// The iteration order of elements only depends on the sequence of +// inserts/deletes, so as long as the inserts/deletes happen in the same +// sequence, the set will have the same iteration order. +// +// Assumes that T can be cheaply copied for simplicity. +template +class OrderedSet { + public: + // Inserts `value` into the ordered set. Returns true if the value was not + // present in the set before the insertion. + bool Insert(T value) { + bool new_insertion = + value_to_index_.insert({value, value_sequence_.size()}).second; + if (new_insertion) { + value_sequence_.push_back(value); + } + return new_insertion; + } + + // Removes `value` from the set. Assumes `value` is already present in the + // set. + void Erase(T value) { + auto it = value_to_index_.find(value); + + // Since we don't want to move values around in `value_sequence_` we swap + // the value in the last position and with value to be deleted and then + // pop_back. + value_to_index_[value_sequence_.back()] = it->second; + std::swap(value_sequence_[it->second], value_sequence_.back()); + value_sequence_.pop_back(); + value_to_index_.erase(it); + } + + void Reserve(size_t new_size) { + value_to_index_.reserve(new_size); + value_sequence_.reserve(new_size); + } + + void Clear() { + value_to_index_.clear(); + value_sequence_.clear(); + } + + bool Contains(T value) const { return value_to_index_.count(value); } + size_t Size() const { return value_sequence_.size(); } + + const std::vector& GetSequence() const { return value_sequence_; } + + private: + // The stable order that we maintain through insertions and deletions. + std::vector value_sequence_; + + // Maps values to their indices in `value_sequence_`. + llvm::DenseMap value_to_index_; +}; + +// --------------------------------------------------------------------- + +// GraphCycles detects the introduction of a cycle into a directed +// graph that is being built up incrementally. +// +// Nodes are identified by small integers. It is not possible to +// record multiple edges with the same (source, destination) pair; +// requests to add an edge where one already exists are silently +// ignored. +// +// It is also not possible to introduce a cycle; an attempt to insert +// an edge that would introduce a cycle fails and returns false. +// +// GraphCycles uses no internal locking; calls into it should be +// serialized externally. + +// Performance considerations: +// Works well on sparse graphs, poorly on dense graphs. +// Extra information is maintained incrementally to detect cycles quickly. +// InsertEdge() is very fast when the edge already exists, and reasonably fast +// otherwise. +// FindPath() is linear in the size of the graph. +// The current implementation uses O(|V|+|E|) space. + +class GraphCycles { + public: + GraphCycles(int32_t num_nodes); + ~GraphCycles(); + + // Attempt to insert an edge from x to y. If the + // edge would introduce a cycle, return false without making any + // changes. Otherwise add the edge and return true. + bool InsertEdge(int32_t x, int32_t y); + + // Remove any edge that exists from x to y. + void RemoveEdge(int32_t x, int32_t y); + + // Return whether there is an edge directly from x to y. + bool HasEdge(int32_t x, int32_t y) const; + + // Contracts the edge from 'a' to node 'b', merging nodes 'a' and 'b'. One of + // the nodes is removed from the graph, and edges to/from it are added to + // the remaining one, which is returned. If contracting the edge would create + // a cycle, does nothing and return no value. + llvm::Optional ContractEdge(int32_t a, int32_t b); + + // Return whether dest_node `y` is reachable from source_node `x` + // by following edges. This is non-thread-safe version. + bool IsReachable(int32_t x, int32_t y); + + // Return a copy of the successors set. This is needed for code using the + // collection while modifying the GraphCycles. + std::vector SuccessorsCopy(int32_t node) const; + + // Returns all nodes in post order. + // + // If there is a path from X to Y then X appears after Y in the + // returned vector. + std::vector AllNodesInPostOrder() const; + + // ---------------------------------------------------- + struct Rep; + + private: + GraphCycles(const GraphCycles&) = delete; + GraphCycles& operator=(const GraphCycles&) = delete; + + Rep *rep_; // opaque representation +}; + +} // namespace mlir + +#endif // TENSORFLOW_COMPILER_MLIR_XLA_TRANSFORMS_CYCLE_DETECTOR_H_ diff --git a/tensorflow/compiler/mlir/xla/transforms/cycle_detector_test.cc b/tensorflow/compiler/mlir/xla/transforms/cycle_detector_test.cc new file mode 100644 index 00000000000..60d5c0b0f37 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/transforms/cycle_detector_test.cc @@ -0,0 +1,91 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/mlir/xla/transforms/cycle_detector.h" + +#include "tensorflow/compiler/xla/test.h" + +class GraphCyclesTest : public ::testing::Test { + public: + GraphCyclesTest(): g_(100) { } + + bool AddEdge(int x, int y) { return g_.InsertEdge(x, y); } + + void AddMultiples() { + // For every node x > 0: add edge to 2*x, 3*x + for (int x = 1; x < 25; x++) { + EXPECT_TRUE(AddEdge(x, 2 * x)) << x; + EXPECT_TRUE(AddEdge(x, 3 * x)) << x; + } + } + + mlir::GraphCycles g_; +}; + +TEST_F(GraphCyclesTest, NoCycle) { + AddMultiples(); +} + +TEST_F(GraphCyclesTest, SimpleCycle) { + AddMultiples(); + EXPECT_FALSE(AddEdge(8, 4)); +} + +TEST_F(GraphCyclesTest, IndirectCycle) { + AddMultiples(); + EXPECT_TRUE(AddEdge(16, 9)); + EXPECT_FALSE(AddEdge(9, 2)); +} + +TEST_F(GraphCyclesTest, RemoveEdge) { + EXPECT_TRUE(AddEdge(1, 2)); + EXPECT_TRUE(AddEdge(2, 3)); + EXPECT_TRUE(AddEdge(3, 4)); + EXPECT_TRUE(AddEdge(4, 5)); + g_.RemoveEdge(2, 3); + EXPECT_FALSE(g_.HasEdge(2, 3)); +} + +TEST_F(GraphCyclesTest, IsReachable) { + EXPECT_TRUE(AddEdge(1, 2)); + EXPECT_TRUE(AddEdge(2, 3)); + EXPECT_TRUE(AddEdge(3, 4)); + EXPECT_TRUE(AddEdge(4, 5)); + + EXPECT_TRUE(g_.IsReachable(1, 5)); + EXPECT_FALSE(g_.IsReachable(5, 1)); +} + +TEST_F(GraphCyclesTest, ContractEdge) { + ASSERT_TRUE(AddEdge(1, 2)); + ASSERT_TRUE(AddEdge(1, 3)); + ASSERT_TRUE(AddEdge(2, 3)); + ASSERT_TRUE(AddEdge(2, 4)); + ASSERT_TRUE(AddEdge(3, 4)); + + // It will introduce a cycle if the edge is contracted + EXPECT_FALSE(g_.ContractEdge(1, 3).hasValue()); + EXPECT_TRUE(g_.HasEdge(1, 3)); + + // Node (2) has more edges. + EXPECT_EQ(*g_.ContractEdge(1, 2), 2); + EXPECT_TRUE(g_.HasEdge(2, 3)); + EXPECT_TRUE(g_.HasEdge(2, 4)); + EXPECT_TRUE(g_.HasEdge(3, 4)); + + // Node (2) has more edges. + EXPECT_EQ(*g_.ContractEdge(2, 3), 2); + EXPECT_TRUE(g_.HasEdge(2, 4)); +} diff --git a/tensorflow/compiler/mlir/xla/transforms/passes.h b/tensorflow/compiler/mlir/xla/transforms/passes.h index e3dd5380d7c..9b9c799b2f0 100644 --- a/tensorflow/compiler/mlir/xla/transforms/passes.h +++ b/tensorflow/compiler/mlir/xla/transforms/passes.h @@ -73,6 +73,9 @@ std::unique_ptr> createTransformUnrankedHloPass(); // necessary to export to XLA. std::unique_ptr> createSinkConstantsToControlFlowPass(); +// fuse xla_hlo ops to kLoop/kInput fusion patterns +std::unique_ptr> createXlaHloFusionPass(); + } // namespace xla_hlo namespace xla_lhlo { diff --git a/tensorflow/compiler/mlir/xla/transforms/xla_hlo_fusion.cc b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_fusion.cc new file mode 100644 index 00000000000..76fceb20ff0 --- /dev/null +++ b/tensorflow/compiler/mlir/xla/transforms/xla_hlo_fusion.cc @@ -0,0 +1,568 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "mlir/Dialect/StandardOps/IR/Ops.h" // TF:llvm-project +#include "mlir/IR/MLIRContext.h" // TF:llvm-project +#include "mlir/IR/Matchers.h" +#include "mlir/Transforms/RegionUtils.h" // TF:llvm-project +#include "mlir/Pass/Pass.h" // TF:local_config_mlir +#include "llvm/ADT/EquivalenceClasses.h" + +#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h" +#include "tensorflow/compiler/mlir/xla/transforms/cycle_detector.h" + +#include +#include +#include +#include + +// This pass has similar functionality of the fusion pass in XLA stack. +// However, unlike XLA, it targets the fully dynamic shape scenario. +// Currently, it implements the kLoop and kInput fusion templates. +// During conversion, it tries to greedily find kLoop/kInput fusion +// patterns. +// +// Similar to XLA, this pass supports fusion pattern having multiple outputs +// if all the shape of outputs are consistent. Following are some examples. +// +// kLoop kInput +// +----+ +----+ +----+ +----+ +----+ +----+ +// |elem| |elem| |elem| |elem<----+elem+---->elem+----+ +// +-+--+ +-+--+ +-+--+ +-+--+ +----+ +-+--+ | +// | | | | | | +// | | | | | +// +-v--+ | +-v--+ +--v---+ +--v---+ | +// |elem+<---+----<+elem| |reduce| |reduce| | +// +-+--+ +-+--+ +--+---+ +--+---+ | +// | | | | | +// | | | | | +// v v v v v +// +// To this end, we also add an simple shape constraint analysis phase. +// For kLoop fusion template, it requires all the outputs of the fused +// pattern have the same shape. However, we don't know the actual value +// of the shape at the compile time in the dynamic shape world. +// Fortunately, we could still infer the relationship among different ops +// according to their shape constrain traits. Currently, We only consider +// shape equality propagation for elementwise ops (assuming that implicit +// shape broadcast is forbidden). The above process could be built on the +// shape dialect once it is ready. + +namespace mlir { + +// To support use EquivalenceClasses directly. +bool operator<(Value lhs, Value rhs) { + return lhs.getAsOpaquePointer() < rhs.getAsOpaquePointer(); +} + +namespace xla_hlo { +namespace { + +using llvm::EquivalenceClasses; +using FusionPattern = std::vector; +using FusionPlan = std::vector; + +bool IsFusible(Operation* op) { + if (matchPattern(op, m_Constant())) { + return true; + } + auto op_fusibility = dyn_cast(op); + return op_fusibility && (op_fusibility.isFusibleWithOperand() || + op_fusibility.isFusibleWithConsumer()); +} + +SmallVector GetInputsOfFusionPattern(const FusionPattern& pattern) { + SmallVector inputs; + DenseSet input_set; + DenseSet op_set; + for (Operation* op : pattern) { + bool inserted = op_set.insert(op).second; + assert(inserted && "FusionPattern contains duplicate operations"); + } + + for (Operation* op : pattern) { + for (Value operand : op->getOperands()) { + Operation* operand_op = operand.getDefiningOp(); + if (op_set.find(operand_op) != op_set.end()) { + // skip if defining op is in the pattern + continue; + } + if (input_set.insert(operand).second) { + inputs.push_back(operand); + } + } + } + return inputs; +} + +SmallVector GetOutputsOfFusionPattern(const FusionPattern& pattern) { + SmallVector outputs; + DenseSet op_set; + for (Operation* op : pattern) { + bool inserted = op_set.insert(op).second; + assert(inserted && "FusionPattern contains duplicate operations"); + } + + for (Operation* op : pattern) { + for (Value result : op->getResults()) { + bool has_external_user = + llvm::any_of(result.getUses(), [&] (OpOperand &use) { + return !op_set.count(use.getOwner()); + }); + if (has_external_user) { + outputs.push_back(result); + } + } + } + return outputs; +} + +FusionPattern MergeFusionPattern( + const FusionPattern& lhs, const FusionPattern& rhs) { + FusionPattern pattern(lhs); + pattern.insert(pattern.end(), rhs.begin(), rhs.end()); + return pattern; +} + +inline int EffectiveSize(const FusionPattern& pattern) { + return llvm::count_if(pattern, [](Operation* op) { + return !matchPattern(op, m_Constant()); + }); +} + +// This is an simple shape constraint analysis, which is used to +// guide fusion decision (e.g. we only fuse shape-compatible ops). +// +// Currently, We only consider shape equality propagation based +// on the shape constrain traits of elementwise ops (assuming that +// implicit shape broadcast is forbidden). +class ShapeConstraintAnalysis { + public: + explicit ShapeConstraintAnalysis(const SmallVectorImpl& op_list) { + PropagateEquality(op_list); + } + + // Returns true is `lhs` and `rhs` are supposed to have same shape. + bool HasSameShape(Value lhs, Value rhs) { + return impl_.isEquivalent(lhs, rhs); + } + + private: + // shape equality propagation based on the shape constrains of + // elementwise ops. + void PropagateEquality(const SmallVectorImpl& op_list) { + bool converged = true; + do { + converged = true; + auto update = [&](Value lhs, Value rhs) { + if (!impl_.isEquivalent(lhs, rhs)) { + converged = false; + impl_.unionSets(lhs, rhs); + } + }; + for (Operation* op : op_list) { + auto op_fusibility = dyn_cast(op); + if (!op_fusibility) continue; + int numInput = op->getNumOperands(); + int numOutput = op->getNumResults(); + // shape equality propagation between inputs. + for (int input1 = 0; input1 < numInput; ++input1) + for (int input2 = input1+1; input2 < numInput; ++input2) + if (op_fusibility.inferInputsShapeEquality(input1, input2)) + update(op->getOperand(input1), op->getOperand(input2)); + + // shape equality propagation between outputs. + for (int output1 = 0; output1 < numOutput; ++output1) + for (int output2 = output1+1; output2 < numOutput; ++output2) + if (op_fusibility.inferOutputsShapeEquality(output1, output2)) + update(op->getResult(output1), op->getResult(output2)); + + // shape equality propagation between input and output. + for (int input = 0; input < numInput; ++input) + for (int output = 0; output < numOutput; ++output) + if (op_fusibility.inferInputOutputShapeEquality(input, output)) + update(op->getOperand(input), op->getResult(output)); + } + } while (!converged); + } + + // a UnionFind set + EquivalenceClasses impl_; +}; + +// A fusion planner that can propose a fusion plan for a block of ops. +// The fusion plan is consisted of a group of fusion patterns. +// +// Currently all proposed patterns followed xla kLoop/kInput like fusion +// templates while are adapted to the fully dynamic shape world. +// +// kLoop fusion template satifies: +// - all ops in the fusion pattern are element-wise. +// - all the shapes of outputs of fusion pattern are same, and thus can +// fit into a same parallel loop. +// +// kInput fusion template satifies: +// - any op in the fusion pattern is either element-wise or a reduction. +// - if a op is a reduction, its output cannot be consumered by other +// ops in the same fusion pattern. +// - all the effective shapes of outputs of fusion pattern are same. +// - For element-wise op, its effective shape is its output shape. +// - For reduction op, its effective shape is its operand shape. +class FusionPlanner { + public: + explicit FusionPlanner(const SmallVectorImpl& op_list) + : op_list_(op_list), + shape_analysis_(op_list), + cycle_detector_(op_list.size()) { + BuildNodeMap(); + } + + // Returns a fusion plan if success, otherwise none. + llvm::Optional Run() { + // Greedily search connected fusible pattern, and ops belonging to + // a same fusion pattern are grouped into a cluster. + RunEdgeContractionLoop(); + + // After doing edge contraction, each unique cluster having size + // more than one represents a potential fusion pattern. + // We collect all these clusters and construct a fusion plan. + // + // Note that the ops in a fusion pattern are in topological ordering. + FusionPlan plan; + DenseMap pattern_ids; + for (Operation* op : op_list_) { + Cluster* cluster = GetClusterForNode(op); + int node_id = cluster->cycles_graph_node_id(); + if (!IsFusible(op_list_[node_id]) || + EffectiveSize(GetClusterForNode(op)->fused_pattern()) <= 1) { + continue; + } + if (!pattern_ids.count(node_id)) { + int pattern_id = pattern_ids.size(); + pattern_ids[node_id] = pattern_id; + plan.emplace_back(); + } + plan[pattern_ids[node_id]].push_back(op); + } + return plan; + } + + // Returns the op_list this planner operates on. + const SmallVectorImpl& op_list() const { + return op_list_; + } + + private: + // Represent a (partial) fused pattern + class Cluster { + public: + Cluster(int node_id, FusionPlanner* planner) + : node_id_(node_id) { + const SmallVectorImpl& op_list = planner->op_list(); + pattern_.push_back(op_list[node_id]); + } + + // Merges `other` into this cluster, and clears `other`. + void Merge(Cluster* other) { + pattern_.insert(pattern_.end(), + other->pattern_.begin(), other->pattern_.end()); + other->pattern_.clear(); + } + + // The number of nodes in this cluster. + int cluster_size() const { return pattern_.size(); } + + // The ID of the cluster as represented in `cycle_detector_`. + int cycles_graph_node_id() const { return node_id_; } + + // Sets the ID of the cluster as represented in `cycle_detector_`. + void set_cycles_graph_node_id(int cycles_graph_node_id) { + node_id_ = cycles_graph_node_id; + } + + // Currently the fused pattern this cluster holds. + const FusionPattern& fused_pattern() { + return pattern_; + } + + private: + // ID of the representative node of this cluster. + int node_id_; + + // the fused pattern this cluster holds. + FusionPattern pattern_; + }; + + private: + Cluster* MakeCluster(int cycles_graph_node_id) { + cluster_storage_.emplace_back( + new Cluster(cycles_graph_node_id, this)); + return cluster_storage_.back().get(); + } + + void BuildNodeMap() { + int num_nodes = op_list_.size(); + for (int node_id = 0; node_id < num_nodes; ++node_id) { + Operation* op = op_list_[node_id]; + MakeCluster(node_id); + op_to_node_id_[op] = node_id; + leader_for_node_.insert(node_id); + for (Value operand : op->getOperands()) { + Operation* operand_op = operand.getDefiningOp(); + if (operand_op == nullptr) { + // skip block argument + continue; + } + auto iter = op_to_node_id_.find(operand_op); + assert(iter != op_to_node_id_.end()); + cycle_detector_.InsertEdge(iter->second, node_id); + } + } + } + + // Returns the cluster contains this op. + Cluster* GetClusterForNode(Operation* n) { + int id = op_to_node_id_.at(n); + id = leader_for_node_.getLeaderValue(id); + return cluster_storage_[id].get(); + } + + // Returns the cluster contains the op having `node_id`. + Cluster* GetClusterForCyclesGraphNode(int node_id) { + return cluster_storage_[leader_for_node_.getLeaderValue(node_id)].get(); + } + + // Merges the clusters `cluster_from` and `cluster_to`. + bool MergeClusters(Cluster* cluster_from, Cluster* cluster_to) { + int from = cluster_from->cycles_graph_node_id(); + int to = cluster_to->cycles_graph_node_id(); + + auto optional_merged_node = cycle_detector_.ContractEdge(from, to); + if (!optional_merged_node.hasValue()) { + llvm::dbgs() << "Could not contract " << from << " -> " << to + << " because contracting the edge would create a cycle."; + return false; + } + + // Merge the clusters. + cluster_from->Merge(cluster_to); + cluster_from->set_cycles_graph_node_id(*optional_merged_node); + + // Merge the UnionFind Set. + leader_for_node_.unionSets(from, to); + return true; + } + + template + bool ForEachEdgeInPostOrder(FnTy fn) { + bool changed = false; + for (int32_t node : cycle_detector_.AllNodesInPostOrder()) { + Cluster* cluster_from = GetClusterForCyclesGraphNode(node); + // Make a copy of the set of successors because we may modify the graph in + // TryToContractEdge. + std::vector successors_copy = + cycle_detector_.SuccessorsCopy(cluster_from->cycles_graph_node_id()); + + for (int to : successors_copy) { + Cluster* cluster_to = GetClusterForCyclesGraphNode(to); + bool contracted_edge = fn(cluster_from, cluster_to); + changed |= contracted_edge; + } + } + + return changed; + } + + // returns the outputs if two cluster were merged + SmallVector GetResultsOfFusedPattern(Cluster* from, Cluster* to) { + FusionPattern fused_pattern = MergeFusionPattern( + from->fused_pattern(), to->fused_pattern()); + return GetOutputsOfFusionPattern(fused_pattern); + } + + // This function check if fusing `from` with `to` is valid and if so perform + // the merge. The validity is based on the operations in the clusters and + // the compatibility of the shapes of the outputs of the would-be fused + // clusters. + // Returns true is the merge was performed. + bool TryToContractEdge(Cluster* from, Cluster* to) { + int node_to = to->cycles_graph_node_id(); + int node_from = from->cycles_graph_node_id(); + + // Both node_to and node_from should be fusible + if (!IsFusible(op_list_[node_to]) || !IsFusible(op_list_[node_from])) { + return false; + } + + auto op_from_fusibility = dyn_cast(op_list_[node_from]); + if (op_from_fusibility && !op_from_fusibility.isFusibleWithConsumer()) { + // This op cannot be fused with its consumers. + return false; + } + + auto op_to_fusibility = dyn_cast(op_list_[node_to]); + if (op_to_fusibility && !op_to_fusibility.isFusibleWithOperand()) { + // This op cannot be fused with its operands. + return false; + } + + // Output shapes of a fusion pattern should be compatible as described in + // the document of this class. + SmallVector results = GetResultsOfFusedPattern(from, to); + auto get_workload_shape = [] (Value v) { + Operation* op = v.getDefiningOp(); + // Block argument + if (!op) return v; + auto op_fusibility = dyn_cast(op); + // Const value + if (!op_fusibility) return v; + llvm::Optional workload = op_fusibility.inferEffectiveWorkloadShape(); + return workload.hasValue() ? *workload : v; + }; + + Value ref = get_workload_shape(results[0]); + if (!llvm::all_of(results, [&] (Value result) { + Value val = get_workload_shape(result); + return shape_analysis_.HasSameShape(ref, val); + })) { + return false; + } + + return MergeClusters(from, to); + } + + // Greedily fuse connected node. + bool RunEdgeContractionLoop() { + using std::placeholders::_1; + using std::placeholders::_2; + return ForEachEdgeInPostOrder( + std::bind(&FusionPlanner::TryToContractEdge, this, _1, _2)); + } + + const SmallVectorImpl& op_list_; + + // Shape equality checker + ShapeConstraintAnalysis shape_analysis_; + + // op -> node_id + std::unordered_map op_to_node_id_; + + // make sure not introduce cycle after fusion + GraphCycles cycle_detector_; + std::vector> cluster_storage_; + + // a UnionFind set. Each set represents a (partial) fused pattern + // and has a leader as representation. + EquivalenceClasses leader_for_node_; +}; + +struct XlaHloFusion : public mlir::PassWrapper { + void runOnFunction() override { + FuncOp func = getFunction(); + if (!IsTargetFunc(func)) { + return; + } + + // process each block and do fusion within a block. + for (Block& block : func.getBlocks()) { + SmallVector op_list; + for (Operation& op : block) { + op_list.push_back(&op); + } + + FusionPlanner planner(op_list); + llvm::Optional plan = planner.Run(); + if (!plan) { + emitError(func.getLoc(), "can't find a fusion plan"); + signalPassFailure(); + return; + } + if (!ApplyFusionPlan(*plan)) { + emitError(func.getLoc(), "apply fusion plan failed"); + signalPassFailure(); + return; + } + } + } + + bool IsTargetFunc(FuncOp func) { + int num_fusible_ops = 0; + bool is_target_func = false; + // We only process the function having enough candidates + func.walk([&](Operation* op) { + num_fusible_ops += static_cast( + dyn_cast(op) != nullptr); + is_target_func = (num_fusible_ops > 1); + // early stop + if (is_target_func) return WalkResult::interrupt(); + return WalkResult::advance(); + }); + return is_target_func; + } + + bool ApplyFusionPlan(const FusionPlan& plan) { + for (const FusionPattern& pattern: plan) { + OpBuilder b(pattern.back()); + + SmallVector locations; + locations.reserve(pattern.size()); + for (Operation* op : pattern) { + locations.push_back(op->getLoc()); + } + Location fused_loc = FusedLoc::get(locations, pattern.back()->getContext()); + + SmallVector inputs = GetInputsOfFusionPattern(pattern); + SmallVector outputs = GetOutputsOfFusionPattern(pattern); + SmallVector output_types; + output_types.reserve(outputs.size()); + for(Value v : outputs) { + output_types.push_back(v.getType()); + } + + FusionOp fusion = b.create(fused_loc, output_types, inputs); + Region& region = fusion.fused_computation(); + region.push_back(new Block); + Block& block = region.front(); + for (Operation* op : pattern) { + op->moveBefore(&block, block.end()); + } + b.setInsertionPoint(&block, block.end()); + b.create(fused_loc, outputs); + + for (auto output_and_result : llvm::zip(outputs, fusion.getResults())) { + Value output = std::get<0>(output_and_result); + Value fusion_result = std::get<1>(output_and_result); + for (OpOperand &use : llvm::make_early_inc_range(output.getUses())) { + if (use.getOwner()->getBlock() != &block) + use.set(fusion_result); + } + } + } + return true; + } +}; + +} // namespace + +std::unique_ptr> createXlaHloFusion() { + return std::make_unique(); +} + +static PassRegistration xla_hlo_fusion_pass( + "xla-hlo-fusion", + "fuse xla_hlo ops to kLoop/kInput fusion patterns."); + +} // namespace xla_hlo +} // namespace mlir From 8b19be016aaf878d30480b4efcceb16f6d810cce Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sun, 7 Jun 2020 01:30:55 +0800 Subject: [PATCH 026/178] update c++ side cardinality test --- .../data/experimental/map_and_batch_dataset_op_test.cc | 6 +++--- tensorflow/core/kernels/data/map_dataset_op_test.cc | 2 +- .../core/kernels/data/parallel_map_dataset_op_test.cc | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc index 3c43687301b..285e1ca3db0 100644 --- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc @@ -316,17 +316,17 @@ DATASET_OUTPUT_SHAPES_TEST_P(MapAndBatchDatasetOpTest, MapAndBatchDatasetParams, std::vector> CardinalityTestCases() { return {{/*dataset_params=*/MapAndBatchDatasetParams1(), - /*expected_cardinality=*/2}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/MapAndBatchDatasetParams2(), /*expected_cardinality=*/2}, {/*dataset_params=*/MapAndBatchDatasetParams3(), /*expected_cardinality=*/3}, {/*dataset_params=*/MapAndBatchDatasetParams4(), - /*expected_cardinality=*/2}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/MapAndBatchDatasetParams5(), /*expected_cardinality=*/2}, {/*dataset_params=*/MapAndBatchDatasetParams6(), - /*expected_cardinality=*/3}}; + /*expected_cardinality=*/-2}}; } DATASET_CARDINALITY_TEST_P(MapAndBatchDatasetOpTest, MapAndBatchDatasetParams, diff --git a/tensorflow/core/kernels/data/map_dataset_op_test.cc b/tensorflow/core/kernels/data/map_dataset_op_test.cc index 0d89e8e4be0..b5fb7709bb1 100644 --- a/tensorflow/core/kernels/data/map_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/map_dataset_op_test.cc @@ -134,7 +134,7 @@ std::vector> CardinalityTestCases() { return {{/*dataset_params=*/MapDatasetParams1(), /*expected_cardinality=*/4}, {/*dataset_params=*/MapDatasetParams2(), - /*expected_cardinality=*/2}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/MapDatasetParams3(), /*expected_cardinality=*/4}}; } diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc index 3fb83d5208f..ea701c691c1 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc @@ -329,17 +329,17 @@ TEST_F(ParallelMapDatasetOpTest, DatasetOutputShapes) { std::vector> CardinalityTestCases() { return {{/*dataset_params=*/ParallelMapDatasetParams1(), - /*expected_cardinality=*/4}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/ParallelMapDatasetParams2(), /*expected_cardinality=*/4}, {/*dataset_params=*/ParallelMapDatasetParams3(), - /*expected_cardinality=*/4}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/ParallelMapDatasetParams4(), - /*expected_cardinality=*/4}, + /*expected_cardinality=*/-2}, {/*dataset_params=*/ParallelMapDatasetParams5(), /*expected_cardinality=*/4}, {/*dataset_params=*/ParallelMapDatasetParams6(), - /*expected_cardinality=*/4}}; + /*expected_cardinality=*/-2}}; } DATASET_CARDINALITY_TEST_P(ParallelMapDatasetOpTest, ParallelMapDatasetParams, From 477203e3257edd49434e782b51586105109cf2ad Mon Sep 17 00:00:00 2001 From: zilinzhu Date: Sun, 7 Jun 2020 08:46:15 +0800 Subject: [PATCH 027/178] use kUnknownCardinality in test --- .../data/experimental/map_and_batch_dataset_op_test.cc | 6 +++--- tensorflow/core/kernels/data/map_dataset_op_test.cc | 2 +- .../core/kernels/data/parallel_map_dataset_op_test.cc | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc index 285e1ca3db0..0b5ff48febc 100644 --- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op_test.cc @@ -316,17 +316,17 @@ DATASET_OUTPUT_SHAPES_TEST_P(MapAndBatchDatasetOpTest, MapAndBatchDatasetParams, std::vector> CardinalityTestCases() { return {{/*dataset_params=*/MapAndBatchDatasetParams1(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/MapAndBatchDatasetParams2(), /*expected_cardinality=*/2}, {/*dataset_params=*/MapAndBatchDatasetParams3(), /*expected_cardinality=*/3}, {/*dataset_params=*/MapAndBatchDatasetParams4(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/MapAndBatchDatasetParams5(), /*expected_cardinality=*/2}, {/*dataset_params=*/MapAndBatchDatasetParams6(), - /*expected_cardinality=*/-2}}; + /*expected_cardinality=*/kUnknownCardinality}}; } DATASET_CARDINALITY_TEST_P(MapAndBatchDatasetOpTest, MapAndBatchDatasetParams, diff --git a/tensorflow/core/kernels/data/map_dataset_op_test.cc b/tensorflow/core/kernels/data/map_dataset_op_test.cc index b5fb7709bb1..e773fc5509f 100644 --- a/tensorflow/core/kernels/data/map_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/map_dataset_op_test.cc @@ -134,7 +134,7 @@ std::vector> CardinalityTestCases() { return {{/*dataset_params=*/MapDatasetParams1(), /*expected_cardinality=*/4}, {/*dataset_params=*/MapDatasetParams2(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/MapDatasetParams3(), /*expected_cardinality=*/4}}; } diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc index ea701c691c1..2e56b1b2685 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op_test.cc @@ -329,17 +329,17 @@ TEST_F(ParallelMapDatasetOpTest, DatasetOutputShapes) { std::vector> CardinalityTestCases() { return {{/*dataset_params=*/ParallelMapDatasetParams1(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/ParallelMapDatasetParams2(), /*expected_cardinality=*/4}, {/*dataset_params=*/ParallelMapDatasetParams3(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/ParallelMapDatasetParams4(), - /*expected_cardinality=*/-2}, + /*expected_cardinality=*/kUnknownCardinality}, {/*dataset_params=*/ParallelMapDatasetParams5(), /*expected_cardinality=*/4}, {/*dataset_params=*/ParallelMapDatasetParams6(), - /*expected_cardinality=*/-2}}; + /*expected_cardinality=*/kUnknownCardinality}}; } DATASET_CARDINALITY_TEST_P(ParallelMapDatasetOpTest, ParallelMapDatasetParams, From a5b24dc2d1d0b9afa63f1e105d3f4e9aca6add8b Mon Sep 17 00:00:00 2001 From: frreiss Date: Sat, 6 Jun 2020 21:47:18 -0700 Subject: [PATCH 028/178] Add static_cast to comparison with unsigned int --- tensorflow/lite/micro/micro_allocator.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 72248f8729f..e5b24aeb636 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -287,7 +287,7 @@ TfLiteStatus FlatBufferIntArrayToTfLiteIntArray( return kTfLiteError; } ret->size = flat_array->Length(); - for (int i = 0; i < flat_array->Length(); i++) { + for (int64_t i = 0; i < static_cast(flat_array->Length()); i++) { ret->data[i] = flat_array->Get(i); } *result = ret; From 2ffed0891b26dba28b7281cf33a92ff574395203 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 7 Jun 2020 12:57:05 +0200 Subject: [PATCH 029/178] Undefined name: import sys for line 56 `sys` is neither defined nor imported which leads to an _undefined name_ which has the potential to raise NameError at runtime. --- tensorflow/compiler/mlir/runlit.cfg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/mlir/runlit.cfg.py b/tensorflow/compiler/mlir/runlit.cfg.py index f1271d0da24..2d225342b56 100644 --- a/tensorflow/compiler/mlir/runlit.cfg.py +++ b/tensorflow/compiler/mlir/runlit.cfg.py @@ -19,6 +19,7 @@ from __future__ import print_function import os import platform +import sys import lit.formats from lit.llvm import llvm_config from lit.llvm.subst import ToolSubst From 2c6f2884fd8ab81e3fae10f51bc68823ac250de5 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 7 Jun 2020 16:06:37 +0200 Subject: [PATCH 030/178] xrange() was removed from Python on 1/1/2020 --- tensorflow/lite/testing/op_tests/lstm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/testing/op_tests/lstm.py b/tensorflow/lite/testing/op_tests/lstm.py index 549c839873e..142cbdb476f 100644 --- a/tensorflow/lite/testing/op_tests/lstm.py +++ b/tensorflow/lite/testing/op_tests/lstm.py @@ -48,7 +48,7 @@ def make_lstm_tests(options): input_vec_size = parameters["input_vec_size"] num_cells = parameters["num_cells"] inputs_after_split = [] - for i in xrange(time_step_size): + for i in range(time_step_size): one_timestamp_input = tf.compat.v1.placeholder( dtype=parameters["dtype"], name="split_{}".format(i), @@ -82,7 +82,7 @@ def make_lstm_tests(options): time_step_size = parameters["time_step_size"] input_vec_size = parameters["input_vec_size"] input_values = [] - for _ in xrange(time_step_size): + for _ in range(time_step_size): tensor_data = create_tensor_data(parameters["dtype"], [num_batchs, input_vec_size], 0, 1) input_values.append(tensor_data) From 974e63ae58e7af322e90636790862fa64c86574a Mon Sep 17 00:00:00 2001 From: Carlos Hernandez-Vaquero Date: Sun, 7 Jun 2020 17:56:10 +0200 Subject: [PATCH 031/178] Updated steps numbers from 13. 12 was duplicate --- tensorflow/lite/g3doc/microcontrollers/get_started.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/g3doc/microcontrollers/get_started.md b/tensorflow/lite/g3doc/microcontrollers/get_started.md index 008d5c65422..999438311d8 100644 --- a/tensorflow/lite/g3doc/microcontrollers/get_started.md +++ b/tensorflow/lite/g3doc/microcontrollers/get_started.md @@ -301,7 +301,7 @@ successfully run. TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); ``` -### 12. Obtain the output +### 13. Obtain the output The model's output tensor can be obtained by calling `output(0)` on the `tflite::MicroInterpreter`, where `0` represents the first (and only) output @@ -328,7 +328,7 @@ float value = output->data.f[0]; TF_LITE_MICRO_EXPECT_NEAR(0., value, 0.05); ``` -### 13. Run inference again +### 14. Run inference again The remainder of the code runs inference several more times. In each instance, we assign a value to the input tensor, invoke the interpreter, and read the @@ -351,7 +351,7 @@ value = output->data.f[0]; TF_LITE_MICRO_EXPECT_NEAR(-0.959, value, 0.05); ``` -### 14. Read the application code +### 15. Read the application code Once you have walked through this unit test, you should be able to understand the example's application code, located in From bcd0f459a15ef4cc11ae84eb17abac048ac23570 Mon Sep 17 00:00:00 2001 From: Srinivasan Narayanamoorthy Date: Sun, 7 Jun 2020 15:48:07 -0700 Subject: [PATCH 032/178] matmul,qmatmul and fusedops support for threadpool api. --- tensorflow/core/kernels/mkl_matmul_op.cc | 22 ++++-- .../core/kernels/mkl_matmul_op_fused.cc | 33 +++++---- .../core/kernels/mkl_matmul_ops_common.h | 71 ++++++++++++------- tensorflow/core/kernels/mkl_qmatmul_op.cc | 30 +++++--- 4 files changed, 97 insertions(+), 59 deletions(-) diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl_matmul_op.cc index 86193901c96..fb197618fb0 100644 --- a/tensorflow/core/kernels/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_matmul_op.cc @@ -62,11 +62,11 @@ class MklMatMulOp : public OpKernel { dim_pair[0].first = transpose_a_ ? 0 : 1; dim_pair[0].second = transpose_b_ ? 1 : 0; - OP_REQUIRES( - ctx, a.dim_size(dim_pair[0].first) == b.dim_size(dim_pair[0].second), - errors::InvalidArgument( - "Matrix size-incompatible: In[0]: ", a.shape().DebugString(), - ", In[1]: ", b.shape().DebugString())); + OP_REQUIRES(ctx, + a.dim_size(dim_pair[0].first) == b.dim_size(dim_pair[0].second), + errors::InvalidArgument("Matrix size-incompatible: In[0]: ", + a.shape().DebugString(), ", In[1]: ", + b.shape().DebugString())); int a_dim_remaining = 1 - dim_pair[0].first; int b_dim_remaining = 1 - dim_pair[0].second; TensorShape out_shape( @@ -158,9 +158,17 @@ class MklMatMulOp : public OpKernel { #ifdef ENABLE_MKLDNN_V1 char char_transa = transa ? 'T' : 'N'; char char_transb = transb ? 'T' : 'N'; - VLOG(2) << "MKL DNN SGEMM CALLED"; + VLOG(2) << "MKL DNN SGEMM called"; +#ifdef ENABLE_MKLDNN_THREADPOOL + auto eigen_tp = + MklDnnThreadPoolWrapper::GetInstance().CreateThreadPoolPtr(ctx); + + dnnl_sgemm_tp(char_transa, char_transb, m, n, k, alpha, a, lda, b, ldb, + beta, c, ldc, eigen_tp); +#else dnnl_sgemm(char_transa, char_transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +#endif // ENABLE_MKLDNN_THREADPOOL #else // TODO(intel-tf): Remove this after TF2.3 fork. cblas_sgemm(CblasRowMajor, transa ? CblasTrans : CblasNoTrans, @@ -182,7 +190,7 @@ class MklMatMulOp : public OpKernel { #ifdef ENABLE_MKLDNN_V1 const char ftrans[] = {'N', 'T', 'C'}; dnnl_gemm(ftrans[index_transa], ftrans[index_transb], m, n, k, - alpha, a, lda, b, ldb, beta, c, ldc); + alpha, a, lda, b, ldb, beta, c, ldc, ctx); #else Tensor c_float; OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_FLOAT, {m, n}, &c_float)); diff --git a/tensorflow/core/kernels/mkl_matmul_op_fused.cc b/tensorflow/core/kernels/mkl_matmul_op_fused.cc index 99a2cfc214b..f3608ef72a8 100644 --- a/tensorflow/core/kernels/mkl_matmul_op_fused.cc +++ b/tensorflow/core/kernels/mkl_matmul_op_fused.cc @@ -86,11 +86,10 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { const int k = src_tf_shape.dim_size(dim_pair[0]); const int channel = weight_tf_shape.dim_size(1 - dim_pair[1]); - OP_REQUIRES( - ctx, k == weight_tf_shape.dim_size(dim_pair[1]), - errors::InvalidArgument( - "Matrix size-incompatible: In[0]: ", src_tf_shape.DebugString(), - ", In[1]: ", weight_tf_shape.DebugString())); + OP_REQUIRES(ctx, k == weight_tf_shape.dim_size(dim_pair[1]), + errors::InvalidArgument("Matrix size-incompatible: In[0]: ", + src_tf_shape.DebugString(), ", In[1]: ", + weight_tf_shape.DebugString())); OP_REQUIRES(ctx, bias_tensor.shape().dim_size(0) == channel, errors::InvalidArgument( "Must provide as many biases as the channel size: ", @@ -159,8 +158,10 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { if (IS_SRC_REORDER_NEEDED(src_md, matmul_pd, matmul_prim)) { src_mkl.SetUsrMem(src_md, src_data); - src_mkl.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( - matmul_pd.get()->PRIMITIVE_DESC_SRC, this->cpu_engine_)); + src_mkl.CheckReorderToOpMem( + MEMORY_PD_WITHOUT_DATA(matmul_pd.get()->PRIMITIVE_DESC_SRC, + this->cpu_engine_), + ctx); src_data = reinterpret_cast(src_mkl.GetOpMem().get_data_handle()); } @@ -191,19 +192,23 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase { weight_data = cached_weight_data; } else { weight_mkl.SetUsrMem(weight_md, weight_data); - weight_mkl.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( - matmul_pd.get()->PRIMITIVE_DESC_WEIGHTS, this->cpu_engine_)); + weight_mkl.CheckReorderToOpMem( + MEMORY_PD_WITHOUT_DATA(matmul_pd.get()->PRIMITIVE_DESC_WEIGHTS, + this->cpu_engine_), + ctx); weight_data = reinterpret_cast(weight_mkl.GetOpMem().get_data_handle()); } } - + std::shared_ptr cpu_stream; + cpu_stream.reset(CreateStream(ctx, matmul_prim->GetEngine())); // Execute fused matmul op. - matmul_prim->Execute(src_data, weight_data, bias_data, dst_data); + matmul_prim->Execute(src_data, weight_data, bias_data, dst_data, + cpu_stream); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( ctx, errors::Aborted("Operation received an exception:", error_msg)); } diff --git a/tensorflow/core/kernels/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl_matmul_ops_common.h index d3a05a4a6d2..d7af614ad04 100644 --- a/tensorflow/core/kernels/mkl_matmul_ops_common.h +++ b/tensorflow/core/kernels/mkl_matmul_ops_common.h @@ -75,8 +75,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { public: explicit MklDnnMatMulFwdPrimitive( const MklDnnMatMulFwdParams& matmulFwdParams) - : cpu_engine_(ENGINE_CPU, 0) { - context_.fwd_stream.reset(new CPU_STREAM(cpu_engine_)); + : MklPrimitive(engine(ENGINE_CPU, 0)) { // Create matmul primitive if (context_.matmul_fwd == nullptr) { Setup(matmulFwdParams); @@ -91,7 +90,18 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { // - bias_data: input data buffer of bias // - dst_data: output data buffer of dst void Execute(const Tinput* src_data, const Tweight* weight_data, - const Tbias* bias_data, Toutput* dst_data) { + const Tbias* bias_data, Toutput* dst_data, + std::shared_ptr fwd_stream) { +#ifdef ENABLE_MKLDNN_THREADPOOL + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data)), *fwd_stream); + context_.weight_mem->set_data_handle( + static_cast(const_cast(weight_data)), *fwd_stream); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle(static_cast(dst_data), + *fwd_stream); +#else context_.src_mem->set_data_handle( static_cast(const_cast(src_data))); context_.weight_mem->set_data_handle( @@ -99,12 +109,12 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { context_.bias_mem->set_data_handle( static_cast(const_cast(bias_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); +#endif // ENABLE_MKLDNN_THREADPOOL #ifdef ENABLE_MKLDNN_V1 - execute_primitives(context_.fwd_primitives, context_.fwd_stream, - context_.net_args); + execute_primitives(context_.fwd_primitives, fwd_stream, context_.net_args); #else - context_.fwd_stream->submit(context_.fwd_primitives); + fwd_stream->submit(context_.fwd_primitives); #endif // ENABLE_MKLDNN_V1 // After execution, set data handle back @@ -153,7 +163,6 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { // Inner-product primitive. std::shared_ptr matmul_fwd; - std::shared_ptr fwd_stream; std::vector fwd_primitives; #ifdef ENABLE_MKLDNN_V1 @@ -176,8 +185,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { weight_md(nullptr), bias_md(nullptr), dst_md(nullptr), - matmul_fwd(nullptr), - fwd_stream(nullptr) { + matmul_fwd(nullptr) { } }; @@ -292,7 +300,6 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive { } struct MklDnnMatMulFwdContext context_; - engine cpu_engine_; }; template PRIMITIVE_DESC_WEIGHTS, cpu_engine_)); + weight.CheckReorderToOpMem( + MEMORY_PD_WITHOUT_DATA(matmul_fwd_pd.get()->PRIMITIVE_DESC_WEIGHTS, + cpu_engine_), + context); weight_data = static_cast(weight.GetOpMem().get_data_handle()); Tensor* weight_tensor_ptr = nullptr; @@ -544,21 +553,28 @@ template class MklMatMulPrimitive : public MklPrimitive { public: explicit MklMatMulPrimitive(const MklMatMulParams& params) - : cpu_engine_(ENGINE_CPU, 0) { - context_.stream.reset(new CPU_STREAM(cpu_engine_)); + : MklPrimitive(engine(ENGINE_CPU, 0)) { // Create matmul primitive Setup(params); } ~MklMatMulPrimitive() {} - void Execute(const T* a_data, const T* b_data, T* c_data) { + void Execute(const T* a_data, const T* b_data, T* c_data, + std::shared_ptr stream) { +#ifdef ENABLE_MKLDNN_THREADPOOL + context_.a_mem->set_data_handle(static_cast(const_cast(a_data)), + *stream); + context_.b_mem->set_data_handle(static_cast(const_cast(b_data)), + *stream); + context_.c_mem->set_data_handle(static_cast(const_cast(c_data)), + *stream); +#else context_.a_mem->set_data_handle(static_cast(const_cast(a_data))); context_.b_mem->set_data_handle(static_cast(const_cast(b_data))); context_.c_mem->set_data_handle(static_cast(const_cast(c_data))); - - execute_primitives(context_.matmul_primitives, context_.stream, - context_.net_args); +#endif // ENABLE_MKLDNN_THREADPOOL + execute_primitives(context_.matmul_primitives, stream, context_.net_args); // After execution, set data handle back context_.a_mem->set_data_handle(DummyData); @@ -584,7 +600,6 @@ class MklMatMulPrimitive : public MklPrimitive { std::shared_ptr c_md; // MatMul primitive. - std::shared_ptr stream; std::vector matmul_primitives; std::vector> net_args; @@ -596,8 +611,7 @@ class MklMatMulPrimitive : public MklPrimitive { prim_desc(nullptr), a_md(nullptr), b_md(nullptr), - c_md(nullptr), - stream(nullptr) {} + c_md(nullptr) {} }; void Setup(const MklMatMulParams& params) { @@ -639,7 +653,6 @@ class MklMatMulPrimitive : public MklPrimitive { } struct MklMatMulContext context_; - engine cpu_engine_; }; template @@ -707,8 +720,8 @@ void dnnl_gemm_batch(const std::vector& transa, const std::vector& n, const std::vector& k, const std::vector& alpha, const T* a, const T* b, const std::vector& beta, T* c, - const int group_count, - const std::vector& group_size) { + const int group_count, const std::vector& group_size, + OpKernelContext* ctx = nullptr) { // Current BatchMatMul support in Tensorflow is narrower than the one offered // by MKL and MKL-DNN. Current BatchMatMul support in Tensorflow uses only 1 // group of size equal to batch_size, and all MatMul parameters (m, n, k, @@ -757,13 +770,15 @@ void dnnl_gemm_batch(const std::vector& transa, MklMatMulPrimitiveFactory::Get(params, 0); // Execute matmul primitive. - matmul_prim->Execute(a, b, c); + std::shared_ptr cpu_stream; + cpu_stream.reset(CreateStream(ctx, matmul_prim->GetEngine())); + matmul_prim->Execute(a, b, c, cpu_stream); } template void dnnl_gemm(char transa, char transb, int64_t m, int64_t n, int64_t k, float alpha, const T* a, int64_t lda, const T* b, int64_t ldb, - float beta, T* c, int64_t ldc) { + float beta, T* c, int64_t ldc, OpKernelContext* ctx = nullptr) { using dims = mkldnn::memory::dims; // Prepare strides based on the transa and transb flags: transposed @@ -786,7 +801,9 @@ void dnnl_gemm(char transa, char transb, int64_t m, int64_t n, int64_t k, MklMatMulPrimitiveFactory::Get(params, 0); // Execute matmul primitive. - matmul_prim->Execute(a, b, c); + std::shared_ptr cpu_stream; + cpu_stream.reset(CreateStream(ctx, matmul_prim->GetEngine())); + matmul_prim->Execute(a, b, c, cpu_stream); } } // anonymous namespace diff --git a/tensorflow/core/kernels/mkl_qmatmul_op.cc b/tensorflow/core/kernels/mkl_qmatmul_op.cc index cc7127e0559..e73f30db4da 100644 --- a/tensorflow/core/kernels/mkl_qmatmul_op.cc +++ b/tensorflow/core/kernels/mkl_qmatmul_op.cc @@ -245,8 +245,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { Tinput* src_data = nullptr; if (IS_SRC_REORDER_NEEDED(src_md, matmul_fwd_pd, matmul_fwd)) { src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( - matmul_fwd_pd.get()->PRIMITIVE_DESC_SRC, this->cpu_engine_)); + src.CheckReorderToOpMem( + MEMORY_PD_WITHOUT_DATA(matmul_fwd_pd.get()->PRIMITIVE_DESC_SRC, + this->cpu_engine_), + context); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast( @@ -279,8 +281,11 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { if (!is_weight_cached) { weight.SetUsrMem(weight_md, &weight_tensor); - weight.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA( - matmul_fwd_pd.get()->PRIMITIVE_DESC_WEIGHTS, this->cpu_engine_)); + weight.CheckReorderToOpMem( + MEMORY_PD_WITHOUT_DATA( + matmul_fwd_pd.get()->PRIMITIVE_DESC_WEIGHTS, + this->cpu_engine_), + context); weight_data = static_cast(weight.GetOpMem().get_data_handle()); } @@ -290,10 +295,13 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { const_cast(weight_tensor.flat().data())); } + std::shared_ptr cpu_stream; + cpu_stream.reset(CreateStream(context, matmul_fwd->GetEngine())); // Execute inner-product - Tbias* bias_data = this->GetBiasHandle(context, matmul_fwd_pd, - bias_tensor, weight_tensor); - matmul_fwd->Execute(src_data, weight_data, bias_data, dst_data); + Tbias* bias_data = this->GetBiasHandle( + context, matmul_fwd_pd, bias_tensor, weight_tensor, cpu_stream); + matmul_fwd->Execute(src_data, weight_data, bias_data, dst_data, + cpu_stream); } catch (mkldnn::error& e) { string error_msg = tensorflow::strings::StrCat( "Status: ", e.status, ", message: ", string(e.message), ", in file ", @@ -393,7 +401,8 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { OpKernelContext* context, std::shared_ptr& mkldnn_matmul_fwd_pd, - const Tensor& bias_tensor, const Tensor& weight_tensor) { + const Tensor& bias_tensor, const Tensor& weight_tensor, + std::shared_ptr reorder_stream) { // If the bias is qint32, it means the bias is already converted offline. // and it can be added to matmul output directly. if (std::is_same::value) { @@ -449,7 +458,6 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { std::vector scales; scales.push_back(out_scale); mkldnn::primitive_attr bias_attr; - stream reorder_stream = CPU_STREAM(this->cpu_engine_); bias_attr.set_output_scales(0, scales); void* bias_buf = static_cast( @@ -468,14 +476,14 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase { {MKLDNN_ARG_FROM, *input_bias_}, { MKLDNN_ARG_TO, *scaled_bias_ }}; - net.at(0).execute(reorder_stream, reorder_net_args); + net.at(0).execute(*reorder_stream, reorder_net_args); #else auto reorder_desc = mkldnn::reorder::primitive_desc( input_bias_->get_primitive_desc(), scaled_bias_->get_primitive_desc(), bias_attr); net.push_back( mkldnn::reorder(reorder_desc, *input_bias_, *scaled_bias_)); - reorder_stream.submit(net).wait(); + reorder_stream->submit(net).wait(); #endif // ENABLE_MKLDNN_V1 return reinterpret_cast(scaled_bias_->get_data_handle()); From bb1c937e374caaceba479ee447d73a7f9b15be70 Mon Sep 17 00:00:00 2001 From: Srinivasan Narayanamoorthy Date: Sun, 7 Jun 2020 17:17:12 -0700 Subject: [PATCH 033/178] review comments. --- tensorflow/core/kernels/mkl_batch_matmul_op.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index dd31c1a6123..c73bb6f9ea3 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -157,9 +157,7 @@ class BatchMatMulMkl : public OpKernel { std::vector ldb_array(batch_size, adj_y_ ? K : N); std::vector ldc_array(batch_size, N); std::vector group_size(1, batch_size); - const Scalar* a = nullptr; - const Scalar* b = nullptr; - Scalar* c = nullptr; + bool threadpool_enabled = false; #ifdef ENABLE_MKLDNN_THREADPOOL threadpool_enabled = true; @@ -167,6 +165,9 @@ class BatchMatMulMkl : public OpKernel { if (std::is_same::value || threadpool_enabled) { // DNNL bfloat16 API requires a, b, and c as pointers to tensors // represented as flat-byte array. + const Scalar* a = nullptr; + const Scalar* b = nullptr; + Scalar* c = nullptr; a = &lhs_reshaped(0, 0, 0); b = &rhs_reshaped(0, 0, 0); OP_REQUIRES(ctx, !bcast.IsBroadcastingRequired(), @@ -289,8 +290,8 @@ class BatchMatMulMkl : public OpKernel { BatchMatMulMkl) #ifdef ENABLE_MKL -TF_CALL_float(REGISTER_BATCH_MATMUL_MKL_V2); TF_CALL_float(REGISTER_BATCH_MATMUL_MKL); +TF_CALL_float(REGISTER_BATCH_MATMUL_MKL_V2); #if defined(ENABLE_MKLDNN_V1) && defined(ENABLE_INTEL_MKL_BFLOAT16) TF_CALL_bfloat16(REGISTER_BATCH_MATMUL_MKL); TF_CALL_bfloat16(REGISTER_BATCH_MATMUL_MKL_V2); From 94ce307aa0fe07fce7892f008233f2f26c606ade Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Mon, 8 Jun 2020 06:21:22 -0700 Subject: [PATCH 034/178] [tfdbg2] Fix num_outputs properties when execution or graph op has no output The previous assumption that `self._output_tensor_ids` is always a tuple is incorrect. It can be `None` for ops without output tensors. Also in this CL: Clarify when properties are `None` and when they are `tuple`s in doc strings of the data objects. PiperOrigin-RevId: 315261796 Change-Id: I398192ba4451e9d8087a2b9e934dff3f561b651a --- .../python/debug/lib/debug_events_reader.py | 15 +++++--- .../debug/lib/debug_events_writer_test.py | 36 ++++++++++++++++++- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py index af4d4d0974d..4adb97de25b 100644 --- a/tensorflow/python/debug/lib/debug_events_reader.py +++ b/tensorflow/python/debug/lib/debug_events_reader.py @@ -308,14 +308,17 @@ class Execution(ExecutionDigest): graph_id: ID of the executed FuncGraph (applicable only the execution of a tf.function). `None` for the eager execution of an individual op. input_tensor_ids: IDs of the input (eager) tensor(s) for this execution, if - any. + any. If the eager execution has no input tensor, this is `None`. Else, + this is a `tuple` of `int`s. output_tensor_ids: IDs of the output (eager) tensor(s) from this execution, - if any. + if any. If the eager execution produces no output tensor, this is `None`. + Else, this is a `tuple` of `int`s. debug_tensor_values: Values of the debug tensor(s), applicable only to non-FULL_TENSOR tensor debug mode. A tuple of list of numbers. Each element of the tuple corresponds to an output tensor of the execution. See documentation of the various TensorDebugModes for the semantics of the - numbers. + numbers. If the eager execution produces no output tensor, this is + `None`. Else, this is a `tuple` of `list`s. """ def __init__(self, @@ -362,7 +365,7 @@ class Execution(ExecutionDigest): @property def num_outputs(self): - return len(self._output_tensor_ids) + return len(self._output_tensor_ids) if self._output_tensor_ids else 0 @property def output_tensor_ids(self): @@ -542,6 +545,8 @@ class GraphOpCreationDigest(BaseDigest): op_type: Type name of the op (e.g., "MatMul"). op_name: Name of the op (e.g., "dense_1/MatMul"). output_tensor_ids: Debugger-generated IDs for the output(s) of the op. + If the op produces no output tensor, this is `None`. Else, this is a + `tuple` of `int`s. input_names: Names of the input tensors to the op. device_name: The name of the device that the op is placed on (if available). host_name: Name of the host on which the op is created. @@ -588,7 +593,7 @@ class GraphOpCreationDigest(BaseDigest): @property def num_outputs(self): - return len(self._output_tensor_ids) + return len(self._output_tensor_ids) if self.output_tensor_ids else 0 @property def input_names(self): diff --git a/tensorflow/python/debug/lib/debug_events_writer_test.py b/tensorflow/python/debug/lib/debug_events_writer_test.py index 8002671450b..57721c1450f 100644 --- a/tensorflow/python/debug/lib/debug_events_writer_test.py +++ b/tensorflow/python/debug/lib/debug_events_writer_test.py @@ -583,7 +583,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase, self.assertEqual(traces[-1].op_name, "Op_%d" % (expected_end - 1)) -class DataObjectsTest(test_util.TensorFlowTestCase): +class DataObjectsTest(test_util.TensorFlowTestCase, parameterized.TestCase): def jsonRoundTripCheck(self, obj): self.assertEqual( @@ -660,6 +660,22 @@ class DataObjectsTest(test_util.TensorFlowTestCase): self.assertIsNone(json["output_tensor_ids"]) self.assertIsNone(json["debug_tensor_values"]) + @parameterized.named_parameters( + ("EmptyList", []), + ("None", None), + ) + def testExecutionWithNoOutputTensorsReturnsZeroForNumOutputs( + self, output_tensor_ids): + execution = debug_events_reader.Execution( + debug_events_reader.ExecutionDigest(1234, 5678, "FooOp"), + "localhost", ("a1", "b2"), + debug_event_pb2.TensorDebugMode.FULL_HEALTH, + graph_id="abcd", + input_tensor_ids=[13, 37], + output_tensor_ids=output_tensor_ids, + debug_tensor_values=None) + self.assertEqual(execution.num_outputs, 0) + def testDebuggedDeviceToJons(self): debugged_device = debug_events_reader.DebuggedDevice("/TPU:3", 4) self.assertEqual(debugged_device.to_json(), { @@ -697,6 +713,24 @@ class DataObjectsTest(test_util.TensorFlowTestCase): "inner_graph_ids": ["c2d3", "c2d3e4"], }) + @parameterized.named_parameters( + ("EmptyList", []), + ("None", None), + ) + def testGraphOpDigestWithNoOutpusReturnsNumOutputsZero( + self, output_tensor_ids): + op_creation_digest = debug_events_reader.GraphOpCreationDigest( + 1234, + 5678, + "deadbeef", + "FooOp", + "Model_1/Foo_2", + output_tensor_ids, + "machine.cluster", ("a1", "a2"), + input_names=None, + device_name=None) + self.assertEqual(op_creation_digest.num_outputs, 0) + def testGraphOpCreationDigestNoInputNoDeviceNameToJson(self): op_creation_digest = debug_events_reader.GraphOpCreationDigest( 1234, From 9429a942256175515d240fab5a7ed2da0f3f3d64 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 8 Jun 2020 07:04:52 -0700 Subject: [PATCH 035/178] Internal, delete the old go_wrappers file. PiperOrigin-RevId: 315267621 Change-Id: Iff04569b1297e7b60ec6b1ec32733d4b6a27875f --- tensorflow/opensource_only/go/op_wrappers.go | 49728 ----------------- 1 file changed, 49728 deletions(-) delete mode 100644 tensorflow/opensource_only/go/op_wrappers.go diff --git a/tensorflow/opensource_only/go/op_wrappers.go b/tensorflow/opensource_only/go/op_wrappers.go deleted file mode 100644 index f118e2bd494..00000000000 --- a/tensorflow/opensource_only/go/op_wrappers.go +++ /dev/null @@ -1,49728 +0,0 @@ -// Copyright 2017 The TensorFlow Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// DO NOT EDIT -// This file was machine generated by github.com/tensorflow/tensorflow/tensorflow/go/genop/internal -// -// WARNING: This generation of wrapper function for TensorFlow ops is in an -// experimental state. The generated API can change without notice. - -package op - -import tf "github.com/tensorflow/tensorflow/tensorflow/go" - -// optionalAttr is an intentionally un-exported type to hide -// details of how optional attributes to operations are implemented. -type optionalAttr map[string]interface{} - -func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, int, error) { - size, err := op.OutputListSize(output) - if err != nil { - return nil, start, err - } - list := make([]tf.Output, size) - for i := 0; i < size; i++ { - list[i] = op.Output(start + i) - } - return list, start + size, nil -} - -// FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient. -type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsGradientNumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization; between 2 and 8, inclusive. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsGradientNumBits(value int64) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsGradientNarrowRange sets the optional narrow_range attribute to value. -// -// value: Whether to quantize into 2^num_bits - 1 distinct values. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxVars operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. -// min, max: Quantization interval, scalar floats. -// -// -// -// Returns: -// backprops_wrt_input: Backpropagated gradients w.r.t. inputs: -// `gradients * (inputs >= min && inputs <= max)`. -// backprop_wrt_min: Backpropagated gradients w.r.t. min parameter: -// `sum(gradients * (inputs < min))`. -// backprop_wrt_max: Backpropagated gradients w.r.t. max parameter: -// `sum(gradients * (inputs > max))`. -func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsGradient", - Input: []tf.Input{ - gradients, inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient. -type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value. -// If not specified, defaults to -6 -func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["min"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value. -// If not specified, defaults to 6 -func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["max"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxArgs operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation. -// -// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation: -// `gradients * (inputs >= min && inputs <= max)`. -func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxArgsGradient", - Input: []tf.Input{ - gradients, inputs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Applies sparse addition to `input` using individual values or slices -// -// from `updates` according to indices `indices`. The updates are non-aliasing: -// `input` is only modified in-place if no other operations will use it. -// Otherwise, a copy of `input` is made. This operation has a gradient with -// respect to both `input` and `updates`. -// -// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `input`. -// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or `(P-K)`-dimensional slices -// (if `K < P`) along the `K`th dimension of `input`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ -// -// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 -// elements. In Python, that addition would look like this: -// -// input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// output = tf.scatter_nd_non_aliasing_add(input, indices, updates) -// with tf.Session() as sess: -// print(sess.run(output)) -// -// The resulting value `output` would look like this: -// -// [1, 13, 3, 14, 14, 6, 7, 20] -// -// See `tf.scatter_nd` for more details about how to make updates to slices. -// -// Arguments: -// input: A Tensor. -// indices: A Tensor. Must be one of the following types: `int32`, `int64`. -// A tensor of indices into `input`. -// updates: A Tensor. Must have the same type as ref. A tensor of updated values -// to add to `input`. -// -// Returns A `Tensor` with the same shape as `input`, containing values of `input` -// updated with `updates`. -func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNdNonAliasingAdd", - Input: []tf.Input{ - input, indices, updates, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Subtracts sparse `updates` from an existing tensor according to `indices`. -// -// This operation creates a new tensor by subtracting sparse `updates` from the -// passed in `tensor`. -// This operation is very similar to `tf.scatter_nd_sub`, except that the updates -// are subtracted from an existing tensor (as opposed to a variable). If the memory -// for the existing tensor cannot be re-used, a copy is made and updated. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of tensor_scatter_sub is to subtract individual elements -// from a tensor by index. For example, say we want to insert 4 scattered elements -// in a rank-1 tensor with 8 elements. -// -// In Python, this scatter subtract operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// tensor = tf.ones([8], dtype=tf.int32) -// updated = tf.tensor_scatter_nd_sub(tensor, indices, updates) -// print(updated) -// ``` -// -// The resulting tensor would look like this: -// -// [1, -10, 1, -9, -8, 1, 1, -11] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -// In Python, this scatter add operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// tensor = tf.ones([4, 4, 4],dtype=tf.int32) -// updated = tf.tensor_scatter_nd_sub(tensor, indices, updates) -// print(updated) -// ``` -// -// The resulting tensor would look like this: -// -// [[[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], -// [[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// tensor: Tensor to copy/update. -// indices: Index tensor. -// updates: Updates to scatter into output. -// -// Returns A new tensor copied from tensor and updates subtracted according to the indices. -func TensorScatterSub(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorScatterSub", - Input: []tf.Input{ - tensor, indices, updates, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds sparse `updates` to an existing tensor according to `indices`. -// -// This operation creates a new tensor by adding sparse `updates` to the passed -// in `tensor`. -// This operation is very similar to `tf.scatter_nd_add`, except that the updates -// are added onto an existing tensor (as opposed to a variable). If the memory -// for the existing tensor cannot be re-used, a copy is made and updated. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `tensor.shape`. The last dimension of `indices` can be at most the rank of -// `tensor.shape`: -// -// indices.shape[-1] <= tensor.shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = tensor.shape.rank`) or slices -// (if `indices.shape[-1] < tensor.shape.rank`) along dimension -// `indices.shape[-1]` of `tensor.shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + tensor.shape[indices.shape[-1]:] -// -// The simplest form of tensor_scatter_add is to add individual elements to a -// tensor by index. For example, say we want to add 4 elements in a rank-1 -// tensor with 8 elements. -// -// In Python, this scatter add operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// tensor = tf.ones([8], dtype=tf.int32) -// updated = tf.tensor_scatter_nd_add(tensor, indices, updates) -// print(updated) -// ``` -// -// The resulting tensor would look like this: -// -// [1, 12, 1, 11, 10, 1, 1, 13] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -// In Python, this scatter add operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// tensor = tf.ones([4, 4, 4],dtype=tf.int32) -// updated = tf.tensor_scatter_nd_add(tensor, indices, updates) -// print(updated) -// ``` -// -// The resulting tensor would look like this: -// -// [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], -// [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// tensor: Tensor to copy/update. -// indices: Index tensor. -// updates: Updates to scatter into output. -// -// Returns A new tensor copied from tensor and updates added according to the indices. -func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorScatterAdd", - Input: []tf.Input{ - tensor, indices, updates, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reshapes a quantized tensor as per the Reshape op. -// -// ``` -// -// Arguments: -// -// shape: Defines the shape of the output tensor. -// input_min: The minimum value of the input. -// input_max: The maximum value of the input. -// -// Returns: -// output -// output_min: This value is copied from input_min. -// output_max: This value is copied from input_max. -func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QuantizedReshape", - Input: []tf.Input{ - tensor, shape, input_min, input_max, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2. -type QuantizeAndDequantizeV2Attr func(optionalAttr) - -// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value. -// -// value: Whether the quantization is signed or unsigned. (actually this parameter should -// have been called `signed_output`) -// If not specified, defaults to true -func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization. -// If not specified, defaults to 8 -func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value. -// -// value: Whether the range is given or should be determined from the `input` tensor. -// If not specified, defaults to false -func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeV2RoundMode sets the optional round_mode attribute to value. -// -// value: The 'round_mode' attribute controls which rounding tie-breaking algorithm is -// used when rounding float values to their quantized equivalents. The following -// rounding modes are currently supported: -// -// * HALF_TO_EVEN: this is the default round_mode. -// * HALF_UP: round towards positive. In this mode 7.5 rounds up to 8 and -7.5 -// rounds up to -7. -// -// If not specified, defaults to "HALF_TO_EVEN" -func QuantizeAndDequantizeV2RoundMode(value string) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["round_mode"] = value - } -} - -// QuantizeAndDequantizeV2NarrowRange sets the optional narrow_range attribute to value. -// -// value: If True, then the absolute value of the quantized minimum value is the same as -// the quantized maximum value, instead of 1 greater. -// i.e. for 8 bit quantization, the minimum value is -127 instead of -128. -// If not specified, defaults to false -func QuantizeAndDequantizeV2NarrowRange(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// QuantizeAndDequantizeV2Axis sets the optional axis attribute to value. -// -// value: If specified, this axis is treated as a channel or slice axis, and a separate -// quantization range is used for each channel or slice along this axis. -// If not specified, defaults to -1 -func QuantizeAndDequantizeV2Axis(value int64) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Quantizes then dequantizes a tensor. -// -// This op simulates the precision loss from the quantized forward pass by: -// -// 1. Quantizing the tensor to fixed point numbers, which should match the target -// quantization method when it is used in inference. -// 2. Dequantizing it back to floating point numbers for the following ops, most -// likely matmul. -// -// There are different ways to quantize. This version uses only scaling, so 0.0 -// maps to 0. -// -// From the specified 'num_bits' in the quantized output type, it determines -// minimum and maximum representable quantized values. -// -// e.g. -// -// * [-128, 127] for signed, num_bits = 8, or -// * [0, 255] for unsigned, num_bits = 8. -// -// If range_given == False, the initial input_min, input_max will be determined -// automatically as the minimum and maximum values in the input tensor, otherwise -// the specified values of input_min, input_max are used. -// -// Note: If the input_min, input_max are specified, they do not need to equal the -// actual minimum and maximum values in the tensor. e.g. in some cases it may be -// beneficial to specify these values such that the low probability extremes of the -// input distribution are clipped. -// -// This op determines the maximum scale_factor that would map the initial -// [input_min, input_max] range to a range that lies within the representable -// quantized range. -// -// It determines the scale from one of input_min and input_max, then updates the -// other one to maximize the representable range. -// -// e.g. -// -// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, -// 5.0]: it would use a scale_factor of -128 / -10.0 = 12.8 In this case, it -// would update input_max to be 127 / 12.8 = 9.921875 -// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, -// 10.0]: it would use a scale_factor of 127 / 10.0 = 12.7 In this case, it -// would update input_min to be 128.0 / 12.7 = -10.07874 -// * if the output is unsigned, input_min is forced to be 0, and only the -// specified input_max is used. -// -// After determining the scale_factor and updating the input range, it applies the -// following to each value in the 'input' tensor. -// -// output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor. -// -// The above round function rounds the value based on the given round_mode. -// -// -// Arguments: -// input: Tensor to quantize and then dequantize. -// input_min: If `range_given == True`, this specifies the minimum input value that needs to -// be represented, otherwise it is determined from the min value of the `input` -// tensor. -// input_max: If `range_given == True`, this specifies the maximum input value that needs to -// be represented, otherwise it is determined from the max value of the `input` -// tensor. -func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV2", - Input: []tf.Input{ - input, input_min, input_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. -type QuantizeAndDequantizeAttr func(optionalAttr) - -// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to false -func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_min"] = value - } -} - -// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_max"] = value - } -} - -// Use QuantizeAndDequantizeV2 instead. -// -// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 -func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OneHotAttr is an optional argument to OneHot. -type OneHotAttr func(optionalAttr) - -// OneHotAxis sets the optional axis attribute to value. -// -// value: The axis to fill (default: -1, a new inner-most axis). -// If not specified, defaults to -1 -func OneHotAxis(value int64) OneHotAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Returns a one-hot tensor. -// -// The locations represented by indices in `indices` take value `on_value`, -// while all other locations take value `off_value`. -// -// If the input `indices` is rank `N`, the output will have rank `N+1`, -// The new axis is created at dimension `axis` (default: the new axis is -// appended at the end). -// -// If `indices` is a scalar the output shape will be a vector of length `depth`. -// -// If `indices` is a vector of length `features`, the output shape will be: -// ``` -// features x depth if axis == -1 -// depth x features if axis == 0 -// ``` -// -// If `indices` is a matrix (batch) with shape `[batch, features]`, -// the output shape will be: -// ``` -// batch x features x depth if axis == -1 -// batch x depth x features if axis == 1 -// depth x batch x features if axis == 0 -// ``` -// -// -// Examples -// ========= -// -// Suppose that -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 5.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[4 x 3]`: -// ``` -// output = -// [5.0 0.0 0.0] // one_hot(0) -// [0.0 0.0 5.0] // one_hot(2) -// [0.0 0.0 0.0] // one_hot(-1) -// [0.0 5.0 0.0] // one_hot(1) -// ``` -// -// Suppose that -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 0.0 -// off_value = 3.0 -// axis = 0 -// ``` -// -// Then output is `[3 x 4]`: -// ``` -// output = -// [0.0 3.0 3.0 3.0] -// [3.0 3.0 3.0 0.0] -// [3.0 3.0 3.0 3.0] -// [3.0 0.0 3.0 3.0] -// // ^ one_hot(0) -// // ^ one_hot(2) -// // ^ one_hot(-1) -// // ^ one_hot(1) -// ``` -// -// Suppose that -// ``` -// indices = [[0, 2], [1, -1]] -// depth = 3 -// on_value = 1.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[2 x 2 x 3]`: -// ``` -// output = -// [ -// [1.0, 0.0, 0.0] // one_hot(0) -// [0.0, 0.0, 1.0] // one_hot(2) -// ][ -// [0.0, 1.0, 0.0] // one_hot(1) -// [0.0, 0.0, 0.0] // one_hot(-1) -// ] -// ``` -// -// Arguments: -// indices: A tensor of indices. -// depth: A scalar defining the depth of the one hot dimension. -// on_value: A scalar defining the value to fill in output when `indices[j] = i`. -// off_value: A scalar defining the value to fill in output when `indices[j] != i`. -// -// Returns The one-hot tensor. -func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OneHot", - Input: []tf.Input{ - indices, depth, on_value, off_value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Extract `patches` from `input` and put them in the "depth" output dimension. 3D extension of `extract_image_patches`. -// -// Arguments: -// input: 5-D Tensor with shape `[batch, in_planes, in_rows, in_cols, depth]`. -// ksizes: The size of the sliding window for each dimension of `input`. -// strides: 1-D of length 5. How far the centers of two consecutive patches are in -// `input`. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`. -// padding: The type of padding algorithm to use. -// -// We specify the size-related attributes as: -// -// ```python -// ksizes = [1, ksize_planes, ksize_rows, ksize_cols, 1] -// strides = [1, stride_planes, strides_rows, strides_cols, 1] -// ``` -// -// Returns 5-D Tensor with shape `[batch, out_planes, out_rows, out_cols, -// ksize_planes * ksize_rows * ksize_cols * depth]` containing patches -// with size `ksize_planes x ksize_rows x ksize_cols x depth` vectorized -// in the "depth" dimension. Note `out_planes`, `out_rows` and `out_cols` -// are the dimensions of the output patches. -func ExtractVolumePatches(scope *Scope, input tf.Output, ksizes []int64, strides []int64, padding string) (patches tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "ExtractVolumePatches", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DepthToSpaceAttr is an optional argument to DepthToSpace. -type DepthToSpaceAttr func(optionalAttr) - -// DepthToSpaceDataFormat sets the optional data_format attribute to value. -// If not specified, defaults to "NHWC" -func DepthToSpaceDataFormat(value string) DepthToSpaceAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthToSpace for tensors of type T. -// -// Rearranges data from depth into blocks of spatial data. -// This is the reverse transformation of SpaceToDepth. More specifically, -// this op outputs a copy of the input tensor where values from the `depth` -// dimension are moved in spatial blocks to the `height` and `width` dimensions. -// The attr `block_size` indicates the input block size and how the data is moved. -// -// * Chunks of data of size `block_size * block_size` from depth are rearranged -// into non-overlapping blocks of size `block_size x block_size` -// * The width the output tensor is `input_depth * block_size`, whereas the -// height is `input_height * block_size`. -// * The Y, X coordinates within each block of the output image are determined -// by the high order component of the input channel index. -// * The depth of the input tensor must be divisible by -// `block_size * block_size`. -// -// The `data_format` attr specifies the layout of the input and output tensors -// with the following options: -// "NHWC": `[ batch, height, width, channels ]` -// "NCHW": `[ batch, channels, height, width ]` -// "NCHW_VECT_C": -// `qint8 [ batch, channels / 4, height, width, 4 ]` -// -// It is useful to consider the operation as transforming a 6-D Tensor. -// e.g. for data_format = NHWC, -// Each element in the input tensor can be specified via 6 coordinates, -// ordered by decreasing memory layout significance as: -// n,iY,iX,bY,bX,oC (where n=batch index, iX, iY means X or Y coordinates -// within the input image, bX, bY means coordinates -// within the output block, oC means output channels). -// The output would be the input transposed to the following layout: -// n,iY,bY,iX,bX,oC -// -// This operation is useful for resizing the activations between convolutions -// (but keeping all data), e.g. instead of pooling. It is also useful for training -// purely convolutional models. -// -// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and -// block_size = 2: -// -// ``` -// x = [[[[1, 2, 3, 4]]]] -// -// ``` -// -// This operation will output a tensor of shape `[1, 2, 2, 1]`: -// -// ``` -// [[[[1], [2]], -// [[3], [4]]]] -// ``` -// -// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`, -// the corresponding output will have 2x2 elements and will have a depth of -// 1 channel (1 = `4 / (block_size * block_size)`). -// The output element shape is `[2, 2, 1]`. -// -// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g. -// -// ``` -// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] -// ``` -// -// This operation, for block size of 2, will return the following tensor of shape -// `[1, 2, 2, 3]` -// -// ``` -// [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// -// ``` -// -// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2: -// -// ``` -// x = [[[[1, 2, 3, 4], -// [5, 6, 7, 8]], -// [[9, 10, 11, 12], -// [13, 14, 15, 16]]]] -// ``` -// -// the operator will return the following tensor of shape `[1 4 4 1]`: -// -// ``` -// x = [[[ [1], [2], [5], [6]], -// [ [3], [4], [7], [8]], -// [ [9], [10], [13], [14]], -// [ [11], [12], [15], [16]]]] -// -// ``` -// -// Arguments: -// -// block_size: The size of the spatial block, same as in Space2Depth. -func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"block_size": block_size} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthToSpace", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SpaceToDepthAttr is an optional argument to SpaceToDepth. -type SpaceToDepthAttr func(optionalAttr) - -// SpaceToDepthDataFormat sets the optional data_format attribute to value. -// If not specified, defaults to "NHWC" -func SpaceToDepthDataFormat(value string) SpaceToDepthAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// SpaceToDepth for tensors of type T. -// -// Rearranges blocks of spatial data, into depth. More specifically, -// this op outputs a copy of the input tensor where values from the `height` -// and `width` dimensions are moved to the `depth` dimension. -// The attr `block_size` indicates the input block size. -// -// * Non-overlapping blocks of size `block_size x block size` are rearranged -// into depth at each location. -// * The depth of the output tensor is `block_size * block_size * input_depth`. -// * The Y, X coordinates within each block of the input become the high order -// component of the output channel index. -// * The input tensor's height and width must be divisible by block_size. -// -// The `data_format` attr specifies the layout of the input and output tensors -// with the following options: -// "NHWC": `[ batch, height, width, channels ]` -// "NCHW": `[ batch, channels, height, width ]` -// "NCHW_VECT_C": -// `qint8 [ batch, channels / 4, height, width, 4 ]` -// -// It is useful to consider the operation as transforming a 6-D Tensor. -// e.g. for data_format = NHWC, -// Each element in the input tensor can be specified via 6 coordinates, -// ordered by decreasing memory layout significance as: -// n,oY,bY,oX,bX,iC (where n=batch index, oX, oY means X or Y coordinates -// within the output image, bX, bY means coordinates -// within the input block, iC means input channels). -// The output would be a transpose to the following layout: -// n,oY,oX,bY,bX,iC -// -// This operation is useful for resizing the activations between convolutions -// (but keeping all data), e.g. instead of pooling. It is also useful for training -// purely convolutional models. -// -// For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and -// block_size = 2: -// -// ``` -// x = [[[[1], [2]], -// [[3], [4]]]] -// ``` -// -// This operation will output a tensor of shape `[1, 1, 1, 4]`: -// -// ``` -// [[[[1, 2, 3, 4]]]] -// ``` -// -// Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`, -// the corresponding output will have a single element (i.e. width and height are -// both 1) and will have a depth of 4 channels (1 * block_size * block_size). -// The output element shape is `[1, 1, 4]`. -// -// For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g. -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// This operation, for block_size of 2, will return the following tensor of shape -// `[1, 1, 1, 12]` -// -// ``` -// [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] -// ``` -// -// Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2: -// -// ``` -// x = [[[[1], [2], [5], [6]], -// [[3], [4], [7], [8]], -// [[9], [10], [13], [14]], -// [[11], [12], [15], [16]]]] -// ``` -// -// the operator will return the following tensor of shape `[1 2 2 4]`: -// -// ``` -// x = [[[[1, 2, 3, 4], -// [5, 6, 7, 8]], -// [[9, 10, 11, 12], -// [13, 14, 15, 16]]]] -// ``` -// -// Arguments: -// -// block_size: The size of the spatial block. -func SpaceToDepth(scope *Scope, input tf.Output, block_size int64, optional ...SpaceToDepthAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"block_size": block_size} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SpaceToDepth", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BatchToSpace for 4-D tensors of type T. -// -// This is a legacy version of the more general BatchToSpaceND. -// -// Rearranges (permutes) data from batch into blocks of spatial data, followed by -// cropping. This is the reverse transformation of SpaceToBatch. More specifically, -// this op outputs a copy of the input tensor where values from the `batch` -// dimension are moved in spatial blocks to the `height` and `width` dimensions, -// followed by cropping along the `height` and `width` dimensions. -// -// Arguments: -// input: 4-D tensor with shape -// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size, -// depth]`. Note that the batch size of the input tensor must be divisible by -// `block_size * block_size`. -// crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies -// how many elements to crop from the intermediate result across the spatial -// dimensions as follows: -// -// crops = [[crop_top, crop_bottom], [crop_left, crop_right]] -// -// -// Returns 4-D with shape `[batch, height, width, depth]`, where: -// -// height = height_pad - crop_top - crop_bottom -// width = width_pad - crop_left - crop_right -// -// The attr `block_size` must be greater than one. It indicates the block size. -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2: -// -// ``` -// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// The output tensor has shape `[1, 4, 4, 1]` and value: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2: -// -// ``` -// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]], -// [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]] -// ``` -// -// The output tensor has shape `[2, 2, 4, 1]` and value: -// -// ``` -// x = [[[[1], [3]], [[5], [7]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"block_size": block_size} - opspec := tf.OpSpec{ - Type: "BatchToSpace", - Input: []tf.Input{ - input, crops, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SpaceToBatch for 4-D tensors of type T. -// -// This is a legacy version of the more general SpaceToBatchND. -// -// Zero-pads and then rearranges (permutes) blocks of spatial data into batch. -// More specifically, this op outputs a copy of the input tensor where values from -// the `height` and `width` dimensions are moved to the `batch` dimension. After -// the zero-padding, both `height` and `width` of the input must be divisible by the -// block size. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, depth]`. -// paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies -// the padding of the input with zeros across the spatial dimensions as follows: -// -// paddings = [[pad_top, pad_bottom], [pad_left, pad_right]] -// -// The effective spatial dimensions of the zero-padded input tensor will be: -// -// height_pad = pad_top + height + pad_bottom -// width_pad = pad_left + width + pad_right -// -// The attr `block_size` must be greater than one. It indicates the block size. -// -// * Non-overlapping blocks of size `block_size x block size` in the height and -// width dimensions are rearranged into the batch dimension at each location. -// * The batch of the output tensor is `batch * block_size * block_size`. -// * Both height_pad and width_pad must be divisible by block_size. -// -// The shape of the output will be: -// -// [batch*block_size*block_size, height_pad/block_size, width_pad/block_size, -// depth] -// -// Some examples: -// -// (1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// The output tensor has shape `[4, 1, 1, 1]` and value: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// (2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// The output tensor has shape `[4, 1, 1, 3]` and value: -// -// ``` -// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// The output tensor has shape `[4, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// (4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// The output tensor has shape `[8, 1, 2, 1]` and value: -// -// ``` -// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]], -// [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]] -// ``` -// -// Among others, this operation is useful for reducing atrous convolution into -// regular convolution. -// -func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"block_size": block_size} - opspec := tf.OpSpec{ - Type: "SpaceToBatch", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SqueezeAttr is an optional argument to Squeeze. -type SqueezeAttr func(optionalAttr) - -// SqueezeAxis sets the optional axis attribute to value. -// -// value: If specified, only squeezes the dimensions listed. The dimension -// index starts at 0. It is an error to squeeze a dimension that is not 1. Must -// be in the range `[-rank(input), rank(input))`. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func SqueezeAxis(value []int64) SqueezeAttr { - return func(m optionalAttr) { - m["squeeze_dims"] = value - } -} - -// Removes dimensions of size 1 from the shape of a tensor. -// -// Given a tensor `input`, this operation returns a tensor of the same type with -// all dimensions of size 1 removed. If you don't want to remove all size 1 -// dimensions, you can remove specific size 1 dimensions by specifying -// `axis`. -// -// For example: -// -// ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t)) ==> [2, 3] -// ``` -// -// Or, to remove specific size 1 dimensions: -// -// ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1] -// ``` -// -// Arguments: -// input: The `input` to squeeze. -// -// Returns Contains the same data as `input`, but has one or more dimensions of -// size 1 removed. -func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Squeeze", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A placeholder op that passes through `input` when its output is not fed. -// -// Arguments: -// input: The default value to produce when `output` is not fed. -// shape: The (possibly partial) shape of the tensor. -// -// Returns A placeholder tensor that defaults to `input` if it is not fed. -func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "PlaceholderWithDefault", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PlaceholderAttr is an optional argument to Placeholder. -type PlaceholderAttr func(optionalAttr) - -// PlaceholderShape sets the optional shape attribute to value. -// -// value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the -// shape is unconstrained. -// If not specified, defaults to -func PlaceholderShape(value tf.Shape) PlaceholderAttr { - return func(m optionalAttr) { - m["shape"] = value - } -} - -// A placeholder op for a value that will be fed into the computation. -// -// N.B. This operation will fail with an error if it is executed. It is -// intended as a way to represent a value that will always be fed, and to -// provide attrs that enable the fed value to be checked at runtime. -// -// Arguments: -// dtype: The type of elements in the tensor. -// -// Returns A placeholder tensor that must be replaced using the feed mechanism. -func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Placeholder", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Return the reduction indices for computing gradients of s0 op s1 with broadcast. -// -// This is typically used by gradient computations for a broadcasting operation. -func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BroadcastGradientArgs", - Input: []tf.Input{ - s0, s1, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Return the shape of s0 op s1 with broadcast. -// -// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the -// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. -func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BroadcastArgs", - Input: []tf.Input{ - s0, s1, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorStridedSliceUpdateAttr is an optional argument to TensorStridedSliceUpdate. -type TensorStridedSliceUpdateAttr func(optionalAttr) - -// TensorStridedSliceUpdateBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func TensorStridedSliceUpdateBeginMask(value int64) TensorStridedSliceUpdateAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// TensorStridedSliceUpdateEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func TensorStridedSliceUpdateEndMask(value int64) TensorStridedSliceUpdateAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// TensorStridedSliceUpdateEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func TensorStridedSliceUpdateEllipsisMask(value int64) TensorStridedSliceUpdateAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// TensorStridedSliceUpdateNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func TensorStridedSliceUpdateNewAxisMask(value int64) TensorStridedSliceUpdateAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} - -// TensorStridedSliceUpdateShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func TensorStridedSliceUpdateShrinkAxisMask(value int64) TensorStridedSliceUpdateAttr { - return func(m optionalAttr) { - m["shrink_axis_mask"] = value - } -} - -// Assign `value` to the sliced l-value reference of `input`. -// -// The values of `value` are assigned to the positions in the tensor `input` that -// are selected by the slice parameters. The slice parameters `begin` `end` -// `strides` etc. work exactly as in `StridedSlice`. -// -// NOTE this op currently does not support broadcasting and so `value`'s shape -// must be exactly the shape produced by the slice of `input`. -func TensorStridedSliceUpdate(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...TensorStridedSliceUpdateAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorStridedSliceUpdate", - Input: []tf.Input{ - input, begin, end, strides, value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Ensures that the tensor's shape matches the expected shape. -// -// Raises an error if the input tensor's shape does not match the specified shape. -// Returns the input tensor otherwise. -// -// Arguments: -// input: A tensor, whose shape is to be validated. -// shape: The expected (possibly partially specified) shape of the input tensor. -// -// Returns A tensor with the same shape and contents as the input tensor or value. -func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "EnsureShape", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ShapeAttr is an optional argument to Shape. -type ShapeAttr func(optionalAttr) - -// ShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func ShapeOutType(value tf.DataType) ShapeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Returns the shape of a tensor. -// -// This operation returns a 1-D integer tensor representing the shape of `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Shape", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2. -type UniqueWithCountsV2Attr func(optionalAttr) - -// UniqueWithCountsV2OutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsV2OutIdx(value tf.DataType) UniqueWithCountsV2Attr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements along an axis of a tensor. -// -// This operation either returns a tensor `y` containing unique elements -// along the `axis` of a tensor. The returned unique elements is sorted -// in the same order as they occur along `axis` in `x`. -// This operation also returns a tensor `idx` and a tensor `count` -// that are the same size as the number of the elements in `x` along the -// `axis` dimension. The `idx` contains the index in the unique output `y` -// and the `count` contains the count in the unique output `y`. -// In other words, for an `1-D` tensor `x` with `axis = None: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` -// -// For an `2-D` tensor `x` with `axis = 0`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx, count = unique_with_counts(x, axis=0) -// y ==> [[1, 0, 0], -// [2, 0, 0]] -// idx ==> [0, 0, 1] -// count ==> [2, 1] -// ``` -// -// For an `2-D` tensor `x` with `axis = 1`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx, count = unique_with_counts(x, axis=1) -// y ==> [[1, 0], -// [1, 0], -// [2, 0]] -// idx ==> [0, 1, 1] -// count ==> [1, 2] -// ``` -// -// Arguments: -// x: A `Tensor`. -// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to -// find the unique elements. -// -// Returns: -// y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. -// idx: A 1-D Tensor. Has the same type as x that contains the index of each -// value of x in the output y. -// count: A 1-D Tensor. The count of each value of x in the output y. -func UniqueWithCountsV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueWithCountsV2Attr) (y tf.Output, idx tf.Output, count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueWithCountsV2", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Shuffle dimensions of x according to a permutation and conjugate the result. -// -// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: -// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` -// `y[i,j,k,...,s,t,u] == conj(x[perm[i], perm[j], perm[k],...,perm[s], perm[t], perm[u]])` -func ConjugateTranspose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConjugateTranspose", - Input: []tf.Input{ - x, perm, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the inverse permutation of a tensor. -// -// This operation computes the inverse of an index permutation. It takes a 1-D -// integer tensor `x`, which represents the indices of a zero-based array, and -// swaps each value with its index position. In other words, for an output tensor -// `y` and an input tensor `x`, this operation computes the following: -// -// `y[x[i]] = i for i in [0, 1, ..., len(x) - 1]` -// -// The values must include 0. There can be no duplicate values or negative values. -// -// For example: -// -// ``` -// # tensor `x` is [3, 4, 0, 2, 1] -// invert_permutation(x) ==> [2, 4, 3, 0, 1] -// ``` -// -// Arguments: -// x: 1-D. -// -// Returns 1-D. -func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InvertPermutation", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PreventGradientAttr is an optional argument to PreventGradient. -type PreventGradientAttr func(optionalAttr) - -// PreventGradientMessage sets the optional message attribute to value. -// -// value: Will be printed in the error when anyone tries to differentiate -// this operation. -// If not specified, defaults to "" -func PreventGradientMessage(value string) PreventGradientAttr { - return func(m optionalAttr) { - m["message"] = value - } -} - -// An identity op that triggers an error if a gradient is requested. -// -// When executed in a graph, this op outputs its input tensor as-is. -// -// When building ops to compute gradients, the TensorFlow gradient system -// will return an error when trying to lookup the gradient of this op, -// because no gradient must ever be registered for this function. This -// op exists to prevent subtle bugs from silently returning unimplemented -// gradients in some corner cases. -// -// Arguments: -// input: any tensor. -// -// Returns the same input tensor. -func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PreventGradient", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Stops gradient computation. -// -// When executed in a graph, this op outputs its input tensor as-is. -// -// When building ops to compute gradients, this op prevents the contribution of -// its inputs to be taken into account. Normally, the gradient generator adds ops -// to a graph to compute the derivatives of a specified 'loss' by recursively -// finding out inputs that contributed to its computation. If you insert this op -// in the graph it inputs are masked from the gradient generator. They are not -// taken into account for computing gradients. -// -// This is useful any time you want to compute a value with TensorFlow but need -// to pretend that the value was a constant. Some examples include: -// -// * The *EM* algorithm where the *M-step* should not involve backpropagation -// through the output of the *E-step*. -// * Contrastive divergence training of Boltzmann machines where, when -// differentiating the energy function, the training must not backpropagate -// through the graph that generated the samples from the model. -// * Adversarial training, where no backprop should happen through the adversarial -// example generation process. -func StopGradient(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StopGradient", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Identity op for gradient debugging. -// -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -// This op operates on non-reference-type tensors. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gather slices from `params` into a Tensor with shape specified by `indices`. -// -// `indices` is a K-dimensional integer tensor, best thought of as a -// (K-1)-dimensional tensor of indices into `params`, where each element defines a -// slice of `params`: -// -// output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] -// -// Whereas in `tf.gather` `indices` defines slices into the `axis` -// dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the -// first `N` dimensions of `params`, where `N = indices.shape[-1]`. -// -// The last dimension of `indices` can be at most the rank of -// `params`: -// -// indices.shape[-1] <= params.rank -// -// The last dimension of `indices` corresponds to elements -// (if `indices.shape[-1] == params.rank`) or slices -// (if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]` -// of `params`. The output tensor has shape -// -// indices.shape[:-1] + params.shape[indices.shape[-1]:] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, a 0 is stored in the -// corresponding output value. -// -// Some examples below. -// -// Simple indexing into a matrix: -// -// ```python -// indices = [[0, 0], [1, 1]] -// params = [['a', 'b'], ['c', 'd']] -// output = ['a', 'd'] -// ``` -// -// Slice indexing into a matrix: -// -// ```python -// indices = [[1], [0]] -// params = [['a', 'b'], ['c', 'd']] -// output = [['c', 'd'], ['a', 'b']] -// ``` -// -// Indexing into a 3-tensor: -// -// ```python -// indices = [[1]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[['a1', 'b1'], ['c1', 'd1']]] -// -// -// indices = [[0, 1], [1, 0]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [['c0', 'd0'], ['a1', 'b1']] -// -// -// indices = [[0, 0, 1], [1, 0, 1]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = ['b0', 'b1'] -// ``` -// -// Batched indexing into a matrix: -// -// ```python -// indices = [[[0, 0]], [[0, 1]]] -// params = [['a', 'b'], ['c', 'd']] -// output = [['a'], ['b']] -// ``` -// -// Batched slice indexing into a matrix: -// -// ```python -// indices = [[[1]], [[0]]] -// params = [['a', 'b'], ['c', 'd']] -// output = [[['c', 'd']], [['a', 'b']]] -// ``` -// -// Batched indexing into a 3-tensor: -// -// ```python -// indices = [[[1]], [[0]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[[['a1', 'b1'], ['c1', 'd1']]], -// [[['a0', 'b0'], ['c0', 'd0']]]] -// -// indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [[['c0', 'd0'], ['a1', 'b1']], -// [['a0', 'b0'], ['c1', 'd1']]] -// -// -// indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]] -// params = [[['a0', 'b0'], ['c0', 'd0']], -// [['a1', 'b1'], ['c1', 'd1']]] -// output = [['b0', 'b1'], ['d0', 'c1']] -// ``` -// -// See also `tf.gather` and `tf.batch_gather`. -// -// Arguments: -// params: The tensor from which to gather values. -// indices: Index tensor. -// -// Returns Values from `params` gathered from indices given by `indices`, with -// shape `indices.shape[:-1] + params.shape[indices.shape[-1]:]`. -func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GatherNd", - Input: []tf.Input{ - params, indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// GatherV2Attr is an optional argument to GatherV2. -type GatherV2Attr func(optionalAttr) - -// GatherV2BatchDims sets the optional batch_dims attribute to value. -// If not specified, defaults to 0 -func GatherV2BatchDims(value int64) GatherV2Attr { - return func(m optionalAttr) { - m["batch_dims"] = value - } -} - -// Gather slices from `params` axis `axis` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `params.shape[:axis] + -// indices.shape[batch_dims:] + params.shape[axis + 1:]` where: -// -// ```python -// # Scalar indices (output is rank(params) - 1). -// output[a_0, ..., a_n, b_0, ..., b_n] = -// params[a_0, ..., a_n, indices, b_0, ..., b_n] -// -// # Vector indices (output is rank(params)). -// output[a_0, ..., a_n, i, b_0, ..., b_n] = -// params[a_0, ..., a_n, indices[i], b_0, ..., b_n] -// -// # Higher rank indices (output is rank(params) + rank(indices) - 1). -// output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] = -// params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n] -// ``` -// -//
-// -//
-// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, a 0 is stored in the -// corresponding output value. -// -// See also `tf.batch_gather` and `tf.gather_nd`. -// -// Arguments: -// params: The tensor from which to gather values. Must be at least rank -// `axis + 1`. -// indices: Index tensor. Must be in range `[0, params.shape[axis])`. -// axis: The axis in `params` to gather `indices` from. Defaults to the first -// dimension. Supports negative indexes. -// -// Returns Values from `params` gathered from indices given by `indices`, with -// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`. -func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output, optional ...GatherV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "GatherV2", - Input: []tf.Input{ - params, indices, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reverses specific dimensions of a tensor. -// -// NOTE `tf.reverse` has now changed behavior in preparation for 1.0. -// `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0. -// -// Given a `tensor`, and a `int32` tensor `axis` representing the set of -// dimensions of `tensor` to reverse. This operation reverses each dimension -// `i` for which there exists `j` s.t. `axis[j] == i`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions specified -// in `axis` may be 0 or more entries. If an index is specified more than -// once, a InvalidArgument error is raised. -// -// For example: -// -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] -// -// # 'dims' is [3] or 'dims' is [-1] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] -// -// # 'dims' is '[1]' (or 'dims' is '[-3]') -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] -// -// # 'dims' is '[2]' (or 'dims' is '[-2]') -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` -// -// Arguments: -// tensor: Up to 8-D. -// axis: 1-D. The indices of the dimensions to reverse. Must be in the range -// `[-rank(tensor), rank(tensor))`. -// -// Returns The same shape as `tensor`. -func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReverseV2", - Input: []tf.Input{ - tensor, axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the batched diagonal part of a batched tensor. -// -// This operation returns a tensor with the `diagonal` part -// of the batched `input`. The `diagonal` part is computed as follows: -// -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor of rank `k - 1` with dimensions `[I, J, K, ..., min(M, N)]` where: -// -// `diagonal[i, j, k, ..., n] = input[i, j, k, ..., n, n]`. -// -// The input must be at least a matrix. -// -// For example: -// -// ``` -// # 'input' is [[[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]], -// [[5, 0, 0, 0] -// [0, 6, 0, 0] -// [0, 0, 7, 0] -// [0, 0, 0, 8]]] -// -// and input.shape = (2, 4, 4) -// -// tf.matrix_diag_part(input) ==> [[1, 2, 3, 4], [5, 6, 7, 8]] -// -// which has shape (2, 4) -// ``` -// -// Arguments: -// input: Rank `k` tensor where `k >= 2`. -// -// Returns The extracted diagonal(s) having shape -// `diagonal.shape = input.shape[:-2] + [min(input.shape[-2:])]`. -func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiagPart", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixSetDiagV3Attr is an optional argument to MatrixSetDiagV3. -type MatrixSetDiagV3Attr func(optionalAttr) - -// MatrixSetDiagV3Align sets the optional align attribute to value. -// -// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is -// a string specifying how superdiagonals and subdiagonals should be aligned, -// respectively. There are four possible alignments: "RIGHT_LEFT" (default), -// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals -// to the right (left-pads the row) and subdiagonals to the left (right-pads the -// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is -// the opposite alignment. -// If not specified, defaults to "RIGHT_LEFT" -func MatrixSetDiagV3Align(value string) MatrixSetDiagV3Attr { - return func(m optionalAttr) { - m["align"] = value - } -} - -// Returns a batched matrix tensor with new batched diagonal values. -// -// Given `input` and `diagonal`, this operation returns a tensor with the -// same shape and values as `input`, except for the specified diagonals of the -// innermost matrices. These will be overwritten by the values in `diagonal`. -// -// `input` has `r+1` dimensions `[I, J, ..., L, M, N]`. When `k` is scalar or -// `k[0] == k[1]`, `diagonal` has `r` dimensions `[I, J, ..., L, max_diag_len]`. -// Otherwise, it has `r+1` dimensions `[I, J, ..., L, num_diags, max_diag_len]`. -// `num_diags` is the number of diagonals, `num_diags = k[1] - k[0] + 1`. -// `max_diag_len` is the longest diagonal in the range `[k[0], k[1]]`, -// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -// -// The output is a tensor of rank `k+1` with dimensions `[I, J, ..., L, M, N]`. -// If `k` is scalar or `k[0] == k[1]`: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, n-max(k[1], 0)] ; if n - m == k[1] -// input[i, j, ..., l, m, n] ; otherwise -// ``` -// -// Otherwise, -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1] -// input[i, j, ..., l, m, n] ; otherwise -// ``` -// where `d = n - m`, `diag_index = k[1] - d`, and -// `index_in_diag = n - max(d, 0) + offset`. -// -// `offset` is zero except when the alignment of the diagonal is to the right. -// ``` -// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} -// and `d >= 0`) or -// (`align` in {LEFT_RIGHT, RIGHT_RIGHT} -// and `d <= 0`) -// 0 ; otherwise -// ``` -// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. -// -// For example: -// -// ``` -// # The main diagonal. -// input = np.array([[[7, 7, 7, 7], # Input shape: (2, 3, 4) -// [7, 7, 7, 7], -// [7, 7, 7, 7]], -// [[7, 7, 7, 7], -// [7, 7, 7, 7], -// [7, 7, 7, 7]]]) -// diagonal = np.array([[1, 2, 3], # Diagonal shape: (2, 3) -// [4, 5, 6]]) -// tf.matrix_set_diag(input, diagonal) -// ==> [[[1, 7, 7, 7], # Output shape: (2, 3, 4) -// [7, 2, 7, 7], -// [7, 7, 3, 7]], -// [[4, 7, 7, 7], -// [7, 5, 7, 7], -// [7, 7, 6, 7]]] -// -// # A superdiagonal (per batch). -// tf.matrix_set_diag(input, diagonal, k = 1) -// ==> [[[7, 1, 7, 7], # Output shape: (2, 3, 4) -// [7, 7, 2, 7], -// [7, 7, 7, 3]], -// [[7, 4, 7, 7], -// [7, 7, 5, 7], -// [7, 7, 7, 6]]] -// -// # A band of diagonals. -// diagonals = np.array([[[0, 9, 1], # Diagonal shape: (2, 4, 3) -// [6, 5, 8], -// [1, 2, 3], -// [4, 5, 0]], -// [[0, 1, 2], -// [5, 6, 4], -// [6, 1, 2], -// [3, 4, 0]]]) -// tf.matrix_set_diag(input, diagonals, k = (-1, 2)) -// ==> [[[1, 6, 9, 7], # Output shape: (2, 3, 4) -// [4, 2, 5, 1], -// [7, 5, 3, 8]], -// [[6, 5, 1, 7], -// [3, 1, 6, 2], -// [7, 4, 2, 4]]] -// -// # LEFT_RIGHT alignment. -// diagonals = np.array([[[9, 1, 0], # Diagonal shape: (2, 4, 3) -// [6, 5, 8], -// [1, 2, 3], -// [0, 4, 5]], -// [[1, 2, 0], -// [5, 6, 4], -// [6, 1, 2], -// [0, 3, 4]]]) -// tf.matrix_set_diag(input, diagonals, k = (-1, 2), align="LEFT_RIGHT") -// ==> [[[1, 6, 9, 7], # Output shape: (2, 3, 4) -// [4, 2, 5, 1], -// [7, 5, 3, 8]], -// [[6, 5, 1, 7], -// [3, 1, 6, 2], -// [7, 4, 2, 4]]] -// -// ``` -// -// Arguments: -// input: Rank `r+1`, where `r >= 1`. -// diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. -// `k >= 1`. -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// -// Returns Rank `r+1`, with `output.shape = input.shape`. -func MatrixSetDiagV3(scope *Scope, input tf.Output, diagonal tf.Output, k tf.Output, optional ...MatrixSetDiagV3Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSetDiagV3", - Input: []tf.Input{ - input, diagonal, k, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a batched matrix tensor with new batched diagonal values. -// -// Given `input` and `diagonal`, this operation returns a tensor with the -// same shape and values as `input`, except for the specified diagonals of the -// innermost matrices. These will be overwritten by the values in `diagonal`. -// -// `input` has `r+1` dimensions `[I, J, ..., L, M, N]`. When `k` is scalar or -// `k[0] == k[1]`, `diagonal` has `r` dimensions `[I, J, ..., L, max_diag_len]`. -// Otherwise, it has `r+1` dimensions `[I, J, ..., L, num_diags, max_diag_len]`. -// `num_diags` is the number of diagonals, `num_diags = k[1] - k[0] + 1`. -// `max_diag_len` is the longest diagonal in the range `[k[0], k[1]]`, -// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -// -// The output is a tensor of rank `k+1` with dimensions `[I, J, ..., L, M, N]`. -// If `k` is scalar or `k[0] == k[1]`: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, n-max(k[1], 0)] ; if n - m == k[1] -// input[i, j, ..., l, m, n] ; otherwise -// ``` -// -// Otherwise, -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1] -// input[i, j, ..., l, m, n] ; otherwise -// ``` -// where `d = n - m`, `diag_index = k[1] - d`, and `index_in_diag = n - max(d, 0)`. -// -// For example: -// -// ``` -// # The main diagonal. -// input = np.array([[[7, 7, 7, 7], # Input shape: (2, 3, 4) -// [7, 7, 7, 7], -// [7, 7, 7, 7]], -// [[7, 7, 7, 7], -// [7, 7, 7, 7], -// [7, 7, 7, 7]]]) -// diagonal = np.array([[1, 2, 3], # Diagonal shape: (2, 3) -// [4, 5, 6]]) -// tf.matrix_set_diag(diagonal) ==> [[[1, 7, 7, 7], # Output shape: (2, 3, 4) -// [7, 2, 7, 7], -// [7, 7, 3, 7]], -// [[4, 7, 7, 7], -// [7, 5, 7, 7], -// [7, 7, 6, 7]]] -// -// # A superdiagonal (per batch). -// tf.matrix_set_diag(diagonal, k = 1) -// ==> [[[7, 1, 7, 7], # Output shape: (2, 3, 4) -// [7, 7, 2, 7], -// [7, 7, 7, 3]], -// [[7, 4, 7, 7], -// [7, 7, 5, 7], -// [7, 7, 7, 6]]] -// -// # A band of diagonals. -// diagonals = np.array([[[1, 2, 3], # Diagonal shape: (2, 2, 3) -// [4, 5, 0]], -// [[6, 1, 2], -// [3, 4, 0]]]) -// tf.matrix_set_diag(diagonals, k = (-1, 0)) -// ==> [[[1, 7, 7, 7], # Output shape: (2, 3, 4) -// [4, 2, 7, 7], -// [0, 5, 3, 7]], -// [[6, 7, 7, 7], -// [3, 1, 7, 7], -// [7, 4, 2, 7]]] -// -// ``` -// -// Arguments: -// input: Rank `r+1`, where `r >= 1`. -// diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`. -// `k >= 1`. -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// -// Returns Rank `r+1`, with `output.shape = input.shape`. -func MatrixSetDiagV2(scope *Scope, input tf.Output, diagonal tf.Output, k tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixSetDiagV2", - Input: []tf.Input{ - input, diagonal, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a diagonal tensor with a given diagonal values. -// -// Given a `diagonal`, this operation returns a tensor with the `diagonal` and -// everything else padded with zeros. The diagonal is computed as follows: -// -// Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of -// rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where: -// -// `output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else. -// -// For example: -// -// ``` -// # 'diagonal' is [1, 2, 3, 4] -// tf.diag(diagonal) ==> [[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]] -// ``` -// -// Arguments: -// diagonal: Rank k tensor where k is at most 1. -func Diag(scope *Scope, diagonal tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Diag", - Input: []tf.Input{ - diagonal, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a tensor of ones with the same shape and type as x. -// -// Arguments: -// x: a tensor of type T. -// -// Returns a tensor of the same shape and type as x but filled with ones. -func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OnesLike", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a constant tensor on the host. Only for writing C++ tests. -// -// Arguments: -// value: Attr `value` is the tensor to return. -// -func HostConst(scope *Scope, value tf.Tensor, dtype tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"value": value, "dtype": dtype} - opspec := tf.OpSpec{ - Type: "HostConst", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Splits a tensor into `num_split` tensors along one dimension. -// -// Arguments: -// axis: 0-D. The dimension along which to split. Must be in the range -// `[-rank(value), rank(value))`. -// value: The tensor to split. -// num_split: The number of ways to split. Must evenly divide -// `value.shape[split_dim]`. -// -// Returns They are identically shaped tensors, whose shape matches that of `value` -// except along `axis`, where their sizes are -// `values.shape[split_dim] / num_split`. -func Split(scope *Scope, axis tf.Output, value tf.Output, num_split int64) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_split": num_split} - opspec := tf.OpSpec{ - Type: "Split", - Input: []tf.Input{ - axis, value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Split", err) - return - } - return output -} - -// Computes offsets of concat inputs within its output. -// -// For example: -// -// ``` -// # 'x' is [2, 2, 7] -// # 'y' is [2, 3, 7] -// # 'z' is [2, 5, 7] -// concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0] -// ``` -// -// This is typically used by gradient computations for a concat operation. -// -// Arguments: -// concat_dim: The dimension along which to concatenate. -// shape: The `N` int32 vectors representing shape of tensors being concatenated. -// -// Returns The `N` int32 vectors representing the starting offset -// of input tensors within the concatenated output. -func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset []tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatOffset", - Input: []tf.Input{ - concat_dim, tf.OutputList(shape), - }, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if offset, idx, err = makeOutputList(op, idx, "offset"); err != nil { - scope.UpdateErr("ConcatOffset", err) - return - } - return offset -} - -// Checks a tensor for NaN and Inf values. -// -// When run, reports an `InvalidArgument` error if `tensor` has any values -// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. -// -// Arguments: -// -// message: Prefix of the error message. -func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"message": message} - opspec := tf.OpSpec{ - Type: "CheckNumerics", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Broadcast an array for a compatible shape. -// -// Broadcasting is the process of making arrays to have compatible shapes -// for arithmetic operations. Two shapes are compatible if for each -// dimension pair they are either equal or one of them is one. When trying -// to broadcast a Tensor to a shape, it starts with the trailing dimensions, -// and works its way forward. -// -// For example, -// -// >>> x = tf.constant([1, 2, 3]) -// >>> y = tf.broadcast_to(x, [3, 3]) -// >>> print(y) -// tf.Tensor( -// [[1 2 3] -// [1 2 3] -// [1 2 3]], shape=(3, 3), dtype=int32) -// -// In the above example, the input Tensor with the shape of `[1, 3]` -// is broadcasted to output Tensor with shape of `[3, 3]`. -// -// When doing broadcasted operations such as multiplying a tensor -// by a scalar, broadcasting (usually) confers some time or space -// benefit, as the broadcasted tensor is never materialized. -// -// However, `broadcast_to` does not carry with it any such benefits. -// The newly-created tensor takes the full memory of the broadcasted -// shape. (In a graph context, `broadcast_to` might be fused to -// subsequent operation and then be optimized away, however.) -// -// Arguments: -// input: A Tensor to broadcast. -// shape: An 1-D `int` Tensor. The shape of the desired output. -// -// Returns A Tensor. -func BroadcastTo(scope *Scope, input tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BroadcastTo", - Input: []tf.Input{ - input, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts an array of flat indices into a tuple of coordinate arrays. -// -// -// Example: -// -// ``` -// y = tf.unravel_index(indices=[2, 5, 7], dims=[3, 3]) -// # 'dims' represent a hypothetical (3, 3) tensor of indices: -// # [[0, 1, *2*], -// # [3, 4, *5*], -// # [6, *7*, 8]] -// # For each entry from 'indices', this operation returns -// # its coordinates (marked with '*'), such as -// # 2 ==> (0, 2) -// # 5 ==> (1, 2) -// # 7 ==> (2, 1) -// y ==> [[0, 1, 2], [2, 2, 1]] -// ``` -// -// @compatibility(numpy) -// Equivalent to np.unravel_index -// @end_compatibility -// -// Arguments: -// indices: An 0-D or 1-D `int` Tensor whose elements are indices into the -// flattened version of an array of dimensions dims. -// dims: An 1-D `int` Tensor. The shape of the array to use for unraveling -// indices. -// -// Returns An 2-D (or 1-D if indices is 0-D) tensor where each row has the -// same shape as the indices array. -func UnravelIndex(scope *Scope, indices tf.Output, dims tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnravelIndex", - Input: []tf.Input{ - indices, dims, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EmptyAttr is an optional argument to Empty. -type EmptyAttr func(optionalAttr) - -// EmptyInit sets the optional init attribute to value. -// -// value: If True, initialize the returned tensor with the default value of dtype. Otherwise, the implementation is free not to initializethe tensor's content. -// If not specified, defaults to false -func EmptyInit(value bool) EmptyAttr { - return func(m optionalAttr) { - m["init"] = value - } -} - -// Creates a tensor with the given shape. -// -// This operation creates a tensor of `shape` and `dtype`. -// -// Arguments: -// shape: 1-D. Represents the shape of the output tensor. -// -// -// Returns A `Tensor` of type `T`. -func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Empty", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Subtracts `v` into specified rows of `x`. -// -// Computes y = x; y[i, :] -= v; return y. -// -// Arguments: -// x: A `Tensor` of type T. -// i: A vector. Indices into the left-most dimension of `x`. -// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. -// -// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. -func InplaceSub(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InplaceSub", - Input: []tf.Input{ - x, i, v, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds v into specified rows of x. -// -// Computes y = x; y[i, :] += v; return y. -// -// Arguments: -// x: A `Tensor` of type T. -// i: A vector. Indices into the left-most dimension of `x`. -// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. -// -// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. -func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InplaceAdd", - Input: []tf.Input{ - x, i, v, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Makes a copy of `x`. -// -// Arguments: -// x: The source tensor of type `T`. -// -// Returns y: A `Tensor` of type `T`. A copy of `x`. Guaranteed that `y` -// is not an alias of `x`. -func DeepCopy(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DeepCopy", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PackAttr is an optional argument to Pack. -type PackAttr func(optionalAttr) - -// PackAxis sets the optional axis attribute to value. -// -// value: Dimension along which to pack. Negative values wrap around, so the -// valid range is `[-(R+1), R+1)`. -// If not specified, defaults to 0 -func PackAxis(value int64) PackAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor. -// -// Packs the `N` tensors in `values` into a tensor with rank one higher than each -// tensor in `values`, by packing them along the `axis` dimension. -// Given a list of tensors of shape `(A, B, C)`; -// -// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`. -// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`. -// Etc. -// -// For example: -// -// ``` -// # 'x' is [1, 4] -// # 'y' is [2, 5] -// # 'z' is [3, 6] -// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. -// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]] -// ``` -// -// This is the opposite of `unpack`. -// -// Arguments: -// values: Must be of same shape and type. -// -// Returns The packed tensor. -func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Pack", - Input: []tf.Input{ - tf.OutputList(values), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MfccAttr is an optional argument to Mfcc. -type MfccAttr func(optionalAttr) - -// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. -// -// value: The highest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 4000 -func MfccUpperFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["upper_frequency_limit"] = value - } -} - -// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. -// -// value: The lowest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 20 -func MfccLowerFrequencyLimit(value float32) MfccAttr { - return func(m optionalAttr) { - m["lower_frequency_limit"] = value - } -} - -// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. -// -// value: Resolution of the Mel bank used internally. -// If not specified, defaults to 40 -func MfccFilterbankChannelCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["filterbank_channel_count"] = value - } -} - -// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. -// -// value: How many output channels to produce per time slice. -// If not specified, defaults to 13 -func MfccDctCoefficientCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["dct_coefficient_count"] = value - } -} - -// Transforms a spectrogram into a form that's useful for speech recognition. -// -// Mel Frequency Cepstral Coefficients are a way of representing audio data that's -// been effective as an input feature for machine learning. They are created by -// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the -// higher frequencies that are less significant to the human ear. They have a long -// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -// is a good resource to learn more. -// -// Arguments: -// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared -// set to true. -// sample_rate: How many samples per second the source audio used. -func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Mfcc", - Input: []tf.Input{ - spectrogram, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AudioSpectrogramAttr is an optional argument to AudioSpectrogram. -type AudioSpectrogramAttr func(optionalAttr) - -// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value. -// -// value: Whether to return the squared magnitude or just the -// magnitude. Using squared magnitude can avoid extra calculations. -// If not specified, defaults to false -func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr { - return func(m optionalAttr) { - m["magnitude_squared"] = value - } -} - -// Produces a visualization of audio data over time. -// -// Spectrograms are a standard way of representing audio information as a series of -// slices of frequency information, one slice for each window of time. By joining -// these together into a sequence, they form a distinctive fingerprint of the sound -// over time. -// -// This op expects to receive audio data as an input, stored as floats in the range -// -1 to 1, together with a window width in samples, and a stride specifying how -// far to move the window between slices. From this it generates a three -// dimensional output. The first dimension is for the channels in the input, so a -// stereo audio input would have two here for example. The second dimension is time, -// with successive frequency slices. The third dimension has an amplitude value for -// each frequency during that time slice. -// -// This means the layout when converted and saved as an image is rotated 90 degrees -// clockwise from a typical spectrogram. Time is descending down the Y axis, and -// the frequency decreases from left to right. -// -// Each value in the result represents the square root of the sum of the real and -// imaginary parts of an FFT on the current window of samples. In this way, the -// lowest dimension represents the power of each frequency in the current window, -// and adjacent windows are concatenated in the next dimension. -// -// To get a more intuitive and visual look at what this operation does, you can run -// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the -// resulting spectrogram as a PNG image. -// -// Arguments: -// input: Float representation of audio data. -// window_size: How wide the input window is in samples. For the highest efficiency -// this should be a power of two, but other values are accepted. -// stride: How widely apart the center of adjacent sample windows should be. -// -// Returns 3D representation of the audio frequencies as an image. -func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"window_size": window_size, "stride": stride} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSpectrogram", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeWavAttr is an optional argument to DecodeWav. -type DecodeWavAttr func(optionalAttr) - -// DecodeWavDesiredChannels sets the optional desired_channels attribute to value. -// -// value: Number of sample channels wanted. -// If not specified, defaults to -1 -func DecodeWavDesiredChannels(value int64) DecodeWavAttr { - return func(m optionalAttr) { - m["desired_channels"] = value - } -} - -// DecodeWavDesiredSamples sets the optional desired_samples attribute to value. -// -// value: Length of audio requested. -// If not specified, defaults to -1 -func DecodeWavDesiredSamples(value int64) DecodeWavAttr { - return func(m optionalAttr) { - m["desired_samples"] = value - } -} - -// Decode a 16-bit PCM WAV file to a float tensor. -// -// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float. -// -// When desired_channels is set, if the input contains fewer channels than this -// then the last channel will be duplicated to give the requested number, else if -// the input has more channels than requested then the additional channels will be -// ignored. -// -// If desired_samples is set, then the audio will be cropped or padded with zeroes -// to the requested length. -// -// The first output contains a Tensor with the content of the audio samples. The -// lowest dimension will be the number of channels, and the second will be the -// number of samples. For example, a ten-sample-long stereo WAV file should give an -// output shape of [10, 2]. -// -// Arguments: -// contents: The WAV-encoded audio, usually from a file. -// -// Returns: -// audio: 2-D with shape `[length, channels]`. -// sample_rate: Scalar holding the sample rate found in the WAV header. -func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (audio tf.Output, sample_rate tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeWav", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// UnbatchGradAttr is an optional argument to UnbatchGrad. -type UnbatchGradAttr func(optionalAttr) - -// UnbatchGradContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func UnbatchGradContainer(value string) UnbatchGradAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// UnbatchGradSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func UnbatchGradSharedName(value string) UnbatchGradAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Gradient of Unbatch. -// -// Acts like Batch but using the given batch_index index of batching things as they -// become available. This ensures that the gradients are propagated back in the -// same session which did the forward pass. -// -// original_input: The input to the Unbatch operation this is the gradient of. -// batch_index: The batch_index given to the Unbatch operation this is the gradient -// of. -// grad: The downstream gradient. -// id: The id scalar emitted by Batch. -// batched_grad: The return value, either an empty tensor or the batched gradient. -// container: Container to control resource sharing. -// shared_name: Instances of UnbatchGrad with the same container and shared_name -// are assumed to possibly belong to the same batch. If left empty, the op name -// will be used as the shared name. -func UnbatchGrad(scope *Scope, original_input tf.Output, batch_index tf.Output, grad tf.Output, id tf.Output, optional ...UnbatchGradAttr) (batched_grad tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnbatchGrad", - Input: []tf.Input{ - original_input, batch_index, grad, id, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes element-wise population count (a.k.a. popcount, bitsum, bitcount). -// -// For each entry in `x`, calculates the number of `1` (on) bits in the binary -// representation of that entry. -// -// **NOTE**: It is more efficient to first `tf.bitcast` your tensors into -// `int32` or `int64` and perform the bitcount on the result, than to feed in -// 8- or 16-bit inputs and then aggregate the resulting counts. -func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "PopulationCount", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Bucketize each feature based on bucket boundaries. -// -// An op that returns a list of float tensors, where each tensor represents the -// bucketized values for a single feature. -// -// Arguments: -// float_values: float; List of Rank 1 Tensor each containing float values for a single feature. -// bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a single -// feature. -// -// Returns int; List of Rank 1 Tensors each containing the bucketized values for a single feature. -func BoostedTreesBucketize(scope *Scope, float_values []tf.Output, bucket_boundaries []tf.Output) (buckets []tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesBucketize", - Input: []tf.Input{ - tf.OutputList(float_values), tf.OutputList(bucket_boundaries), - }, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if buckets, idx, err = makeOutputList(op, idx, "buckets"); err != nil { - scope.UpdateErr("BoostedTreesBucketize", err) - return - } - return buckets -} - -// Returns immutable tensor from memory region. -// -// The current implementation memmaps the tensor from a file. -// -// Arguments: -// dtype: Type of the returned tensor. -// shape: Shape of the returned tensor. -// memory_region_name: Name of readonly memory region used by the tensor, see -// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. -func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} - opspec := tf.OpSpec{ - Type: "ImmutableConst", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Add the quantile summaries to each quantile stream resource. -// -// An op that adds a list of quantile summaries to a quantile stream resource. Each -// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank) -// for a single feature. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature. -// -// Returns the created operation. -func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceAddSummaries", - Input: []tf.Input{ - quantile_stream_resource_handle, tf.OutputList(summaries), - }, - } - return scope.AddOperation(opspec) -} - -// BoostedTreesCreateQuantileStreamResourceAttr is an optional argument to BoostedTreesCreateQuantileStreamResource. -type BoostedTreesCreateQuantileStreamResourceAttr func(optionalAttr) - -// BoostedTreesCreateQuantileStreamResourceMaxElements sets the optional max_elements attribute to value. -// -// value: int; The maximum number of data points that can be fed to the stream. -// If not specified, defaults to 1099511627776 -func BoostedTreesCreateQuantileStreamResourceMaxElements(value int64) BoostedTreesCreateQuantileStreamResourceAttr { - return func(m optionalAttr) { - m["max_elements"] = value - } -} - -// Create the Resource for Quantile Streams. -// -// Arguments: -// quantile_stream_resource_handle: resource; Handle to quantile stream resource. -// epsilon: float; The required approximation error of the stream resource. -// num_streams: int; The number of streams managed by the resource that shares the same epsilon. -// -// Returns the created operation. -func BoostedTreesCreateQuantileStreamResource(scope *Scope, quantile_stream_resource_handle tf.Output, epsilon tf.Output, num_streams tf.Output, optional ...BoostedTreesCreateQuantileStreamResourceAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesCreateQuantileStreamResource", - Input: []tf.Input{ - quantile_stream_resource_handle, epsilon, num_streams, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// BoostedTreesUpdateEnsembleV2Attr is an optional argument to BoostedTreesUpdateEnsembleV2. -type BoostedTreesUpdateEnsembleV2Attr func(optionalAttr) - -// BoostedTreesUpdateEnsembleV2LogitsDimension sets the optional logits_dimension attribute to value. -// -// value: scalar, dimension of the logits -// If not specified, defaults to 1 -func BoostedTreesUpdateEnsembleV2LogitsDimension(value int64) BoostedTreesUpdateEnsembleV2Attr { - return func(m optionalAttr) { - m["logits_dimension"] = value - } -} - -// Updates the tree ensemble by adding a layer to the last tree being grown -// -// or by starting a new tree. -// -// Arguments: -// tree_ensemble_handle: Handle to the ensemble variable. -// feature_ids: Rank 1 tensor with ids for each feature. This is the real id of -// the feature that will be used in the split. -// dimension_ids: List of rank 1 tensors representing the dimension in each feature. -// node_ids: List of rank 1 tensors representing the nodes for which this feature -// has a split. -// gains: List of rank 1 tensors representing the gains for each of the feature's -// split. -// thresholds: List of rank 1 tensors representing the thesholds for each of the -// feature's split. -// left_node_contribs: List of rank 2 tensors with left leaf contribs for each of -// the feature's splits. Will be added to the previous node values to constitute -// the values of the left nodes. -// right_node_contribs: List of rank 2 tensors with right leaf contribs for each -// of the feature's splits. Will be added to the previous node values to constitute -// the values of the right nodes. -// split_types: List of rank 1 tensors representing the split type for each feature. -// max_depth: Max depth of the tree to build. -// learning_rate: shrinkage const for each new tree. -// pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning. -// -// Returns the created operation. -func BoostedTreesUpdateEnsembleV2(scope *Scope, tree_ensemble_handle tf.Output, feature_ids []tf.Output, dimension_ids []tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, split_types []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode tf.Output, optional ...BoostedTreesUpdateEnsembleV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesUpdateEnsembleV2", - Input: []tf.Input{ - tree_ensemble_handle, tf.OutputList(feature_ids), tf.OutputList(dimension_ids), tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), tf.OutputList(split_types), max_depth, learning_rate, pruning_mode, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Updates the tree ensemble by either adding a layer to the last tree being grown -// -// or by starting a new tree. -// -// Arguments: -// tree_ensemble_handle: Handle to the ensemble variable. -// feature_ids: Rank 1 tensor with ids for each feature. This is the real id of -// the feature that will be used in the split. -// node_ids: List of rank 1 tensors representing the nodes for which this feature -// has a split. -// gains: List of rank 1 tensors representing the gains for each of the feature's -// split. -// thresholds: List of rank 1 tensors representing the thesholds for each of the -// feature's split. -// left_node_contribs: List of rank 2 tensors with left leaf contribs for each of -// the feature's splits. Will be added to the previous node values to constitute -// the values of the left nodes. -// right_node_contribs: List of rank 2 tensors with right leaf contribs for each -// of the feature's splits. Will be added to the previous node values to constitute -// the values of the right nodes. -// max_depth: Max depth of the tree to build. -// learning_rate: shrinkage const for each new tree. -// pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning. -// -// Returns the created operation. -func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pruning_mode": pruning_mode} - opspec := tf.OpSpec{ - Type: "BoostedTreesUpdateEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Runs multiple additive regression ensemble predictors on input instances and -// -// computes the update to cached logits. It is designed to be used during training. -// It traverses the trees starting from cached tree id and cached node id and -// calculates the updates to be pushed to the cache. -// -// Arguments: -// -// cached_tree_ids: Rank 1 Tensor containing cached tree ids which is the starting -// tree of prediction. -// cached_node_ids: Rank 1 Tensor containing cached node id which is the starting -// node of prediction. -// bucketized_features: A list of rank 1 Tensors containing bucket id for each -// feature. -// logits_dimension: scalar, dimension of the logits, to be used for partial logits -// shape. -// -// Returns: -// partial_logits: Rank 2 Tensor containing logits update (with respect to cached -// values stored) for each example. -// tree_ids: Rank 1 Tensor containing new tree ids for each example. -// node_ids: Rank 1 Tensor containing new node ids in the new tree_ids. -func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, cached_tree_ids tf.Output, cached_node_ids tf.Output, bucketized_features []tf.Output, logits_dimension int64) (partial_logits tf.Output, tree_ids tf.Output, node_ids tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesTrainingPredict", - Input: []tf.Input{ - tree_ensemble_handle, cached_tree_ids, cached_node_ids, tf.OutputList(bucketized_features), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Aggregates the summary of accumulated stats for the batch. -// -// The summary stats contains gradients and hessians accumulated for each node, feature dimension id and bucket. -// -// Arguments: -// node_ids: int32; Rank 1 Tensor containing node ids for each example, shape [batch_size]. -// gradients: float32; Rank 2 Tensor (shape=[batch_size, logits_dimension]) with gradients for each example. -// hessians: float32; Rank 2 Tensor (shape=[batch_size, hessian_dimension]) with hessians for each example. -// feature: int32; Rank 2 feature Tensors (shape=[batch_size, feature_dimension]). -// max_splits: int; the maximum number of splits possible in the whole tree. -// num_buckets: int; equals to the maximum possible value of bucketized feature. -// -// Returns output Rank 4 Tensor (shape=[splits, feature_dimension, buckets, logits_dimension + hessian_dimension]) -// containing accumulated stats for each node, feature dimension and bucket. -func BoostedTreesAggregateStats(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, feature tf.Output, max_splits int64, num_buckets int64) (stats_summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "BoostedTreesAggregateStats", - Input: []tf.Input{ - node_ids, gradients, hessians, feature, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Makes the summary of accumulated stats for the batch. -// -// The summary stats contains gradients and hessians accumulated into the corresponding node and bucket for each example. -// -// Arguments: -// node_ids: int32 Rank 1 Tensor containing node ids, which each example falls into for the requested layer. -// gradients: float32; Rank 2 Tensor (shape=[#examples, 1]) for gradients. -// hessians: float32; Rank 2 Tensor (shape=[#examples, 1]) for hessians. -// bucketized_features_list: int32 list of Rank 1 Tensors, each containing the bucketized feature (for each feature column). -// max_splits: int; the maximum number of splits possible in the whole tree. -// num_buckets: int; equals to the maximum possible value of bucketized feature. -// -// Returns output Rank 4 Tensor (shape=[#features, #splits, #buckets, 2]) containing accumulated stats put into the corresponding node and bucket. The first index of 4th dimension refers to gradients, and the second to hessians. -func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, bucketized_features_list []tf.Output, max_splits int64, num_buckets int64) (stats_summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "BoostedTreesMakeStatsSummary", - Input: []tf.Input{ - node_ids, gradients, hessians, tf.OutputList(bucketized_features_list), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deserializes a serialized tree ensemble config and replaces current tree -// -// ensemble. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// stamp_token: Token to use as the new value of the resource stamp. -// tree_ensemble_serialized: Serialized proto of the ensemble. -// -// Returns the created operation. -func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesDeserializeEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, stamp_token, tree_ensemble_serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Flush the quantile summaries from each quantile stream resource. -// -// An op that outputs a list of quantile summaries of a quantile stream resource. -// Each summary Tensor is rank 2, containing summaries (value, weight, min_rank, -// max_rank) for a single feature. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// -func BoostedTreesFlushQuantileSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (summaries []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_features": num_features} - opspec := tf.OpSpec{ - Type: "BoostedTreesFlushQuantileSummaries", - Input: []tf.Input{ - quantile_stream_resource_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil { - scope.UpdateErr("BoostedTreesFlushQuantileSummaries", err) - return - } - return summaries -} - -// BoostedTreesSparseCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesSparseCalculateBestFeatureSplit. -type BoostedTreesSparseCalculateBestFeatureSplitAttr func(optionalAttr) - -// BoostedTreesSparseCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value. -// -// value: A string indicating if this Op should perform inequality split or equality split. -// If not specified, defaults to "inequality" -func BoostedTreesSparseCalculateBestFeatureSplitSplitType(value string) BoostedTreesSparseCalculateBestFeatureSplitAttr { - return func(m optionalAttr) { - m["split_type"] = value - } -} - -// Calculates gains for each feature and returns the best possible split information for the feature. -// -// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. -// -// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. -// -// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). -// -// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature. -// -// Arguments: -// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). -// stats_summary_indices: A Rank 2 int64 tensor of dense shape [N, 4] (N specifies the number of non-zero values) for accumulated stats summary (gradient/hessian) per node per bucket for each feature. The second dimension contains node id, feature dimension, bucket id, and stats dim. -// stats dim is the sum of logits dimension and hessian dimension, hessian dimension can either be logits dimension if diagonal hessian is used, or logits dimension^2 if full hessian is used. -// stats_summary_values: A Rank 1 float tensor of dense shape [N] (N specifies the number of non-zero values), which supplies the values for each element in summary_indices. -// stats_summary_shape: A Rank 1 float tensor of dense shape [4], which specifies the dense shape of the sparse tensor, which is [num tree nodes, feature dimensions, num buckets, stats dim]. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// tree_complexity: adjustment to the gain, per leaf based. -// min_node_weight: minimum avg of hessians in a node before required for the node to be considered for splitting. -// logits_dimension: The dimension of logit, i.e., number of classes. -// -// Returns: -// node_ids: A Rank 1 tensor indicating possible node ids that can be split. -// gains: A Rank 1 tensor indicating the best gains to split each node. -// feature_dimensions: A Rank 1 tensor indicating the best feature dimension for each feature to split for each node. -// thresholds: A Rank 1 tensor indicating the bucket id to compare with (as a threshold) for split in each node. -// left_node_contribs: A Rank 2 tensor indicating the contribution of the left nodes when branching from parent nodes to the left direction by the given threshold for each feature. -// This value will be used to make the left node value by adding to the parent node value. Second dimension size is logits dimension. -// right_node_contribs: A Rank 2 tensor, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. -// split_with_default_directions: A Rank 1 tensor indicating which direction to go if data is missing. -// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2. -func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesSparseCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesSparseCalculateBestFeatureSplit", - Input: []tf.Input{ - node_id_range, stats_summary_indices, stats_summary_values, stats_summary_shape, l1, l2, tree_complexity, min_node_weight, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// Calculates gains for each feature and returns the best possible split information for each node. However, if no split is found, then no split information is returned for that node. -// -// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. -// -// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. -// -// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). -// -// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature. -// -// Arguments: -// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). -// stats_summaries_list: A list of Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature. -// The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. -// split_types: A Rank 1 tensor indicating if this Op should perform inequality split or equality split per feature. -// candidate_feature_ids: Rank 1 tensor with ids for each feature. This is the real id of the feature. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// tree_complexity: adjustment to the gain, per leaf based. -// min_node_weight: minimum avg of hessians in a node before required for the node to be considered for splitting. -// logits_dimension: The dimension of logit, i.e., number of classes. -// -// Returns: -// node_ids: A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes. -// gains: A Rank 1 tensor indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes. -// feature_ids: A Rank 1 tensors indicating the best feature id for each node. See above for details like shapes and sizes. -// feature_dimensions: A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes. -// thresholds: A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes. -// left_node_contribs: A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes. -// right_node_contribs: A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. -// split_with_default_directions: A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes. -// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2. -func BoostedTreesCalculateBestFeatureSplitV2(scope *Scope, node_id_range tf.Output, stats_summaries_list []tf.Output, split_types tf.Output, candidate_feature_ids tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64) (node_ids tf.Output, gains tf.Output, feature_ids tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesCalculateBestFeatureSplitV2", - Input: []tf.Input{ - node_id_range, tf.OutputList(stats_summaries_list), split_types, candidate_feature_ids, l1, l2, tree_complexity, min_node_weight, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7) -} - -// Calculates gains for each feature and returns the best possible split information for the feature. -// -// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. -// -// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. -// -// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). -// -// The length of output lists are all of the same length, `num_features`. -// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature. -// -// Arguments: -// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). -// stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// tree_complexity: adjustment to the gain, per leaf based. -// min_node_weight: minimum avg of hessians in a node before required for the node to be considered for splitting. -// max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors. -// -// Returns: -// node_ids_list: An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes. -// gains_list: An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes. -// thresholds_list: An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes. -// left_node_contribs_list: A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes. -// right_node_contribs_list: A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. -func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"max_splits": max_splits} - opspec := tf.OpSpec{ - Type: "BoostedTreesCalculateBestGainsPerFeature", - Input: []tf.Input{ - node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list -} - -// Checks whether a tree ensemble has been initialized. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble resource. -// -// Returns output boolean on whether it is initialized or not. -func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsBoostedTreesEnsembleInitialized", - Input: []tf.Input{ - tree_ensemble_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BoostedTreesEnsembleResourceHandleOpAttr is an optional argument to BoostedTreesEnsembleResourceHandleOp. -type BoostedTreesEnsembleResourceHandleOpAttr func(optionalAttr) - -// BoostedTreesEnsembleResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func BoostedTreesEnsembleResourceHandleOpContainer(value string) BoostedTreesEnsembleResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// BoostedTreesEnsembleResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func BoostedTreesEnsembleResourceHandleOpSharedName(value string) BoostedTreesEnsembleResourceHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a BoostedTreesEnsembleResource -func BoostedTreesEnsembleResourceHandleOp(scope *Scope, optional ...BoostedTreesEnsembleResourceHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesEnsembleResourceHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deserializes a proto into the tree handle -// -// Arguments: -// tree_handle: Handle to the tree resource to be restored. -// tree_config: Serialied proto string of the boosted_trees.Tree proto. -// -// Returns the created operation. -func TensorForestTreeDeserialize(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeDeserialize", - Input: []tf.Input{ - tree_handle, tree_config, - }, - } - return scope.AddOperation(opspec) -} - -// Serializes the tree handle to a proto -// -// Arguments: -// tree_handle: Handle to the tree resource to be serialized. -// -// Returns Serialied proto string of the tree resource. -func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeSerialize", - Input: []tf.Input{ - tree_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a tree resource and returns a handle to it. -// -// Arguments: -// tree_handle: Handle to the tree resource to be created. -// tree_config: Serialized proto string of the boosted_trees.Tree. -// -// Returns the created operation. -func TensorForestCreateTreeVariable(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestCreateTreeVariable", - Input: []tf.Input{ - tree_handle, tree_config, - }, - } - return scope.AddOperation(opspec) -} - -// Checks whether a tree has been initialized. -// -// Arguments: -// tree_handle: Handle to the tree. -// -// Returns Whether the tree is initialized. -func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_initialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeIsInitializedOp", - Input: []tf.Input{ - tree_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. -type TensorForestTreeResourceHandleOpAttr func(optionalAttr) - -// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a TensorForestTreeResource -func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeResourceHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. -type AllCandidateSamplerAttr func(optionalAttr) - -// AllCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to produce. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AllCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler. -type FixedUnigramCandidateSamplerAttr func(optionalAttr) - -// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value. -// -// value: Each valid line in this file (which should have a CSV-like format) -// corresponds to a valid word ID. IDs are in sequential order, starting from -// num_reserved_ids. The last entry in each line is expected to be a value -// corresponding to the count or relative probability. Exactly one of vocab_file -// and unigrams needs to be passed to this op. -// If not specified, defaults to "" -func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["vocab_file"] = value - } -} - -// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value. -// -// value: The distortion is used to skew the unigram probability distribution. -// Each weight is first raised to the distortion's power before adding to the -// internal unigram distribution. As a result, distortion = 1.0 gives regular -// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives -// a uniform distribution. -// If not specified, defaults to 1 -func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["distortion"] = value - } -} - -// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value. -// -// value: Optionally some reserved IDs can be added in the range [0, -// ..., num_reserved_ids) by the users. One use case is that a special unknown -// word token is used as ID 0. These IDs will have a sampling probability of 0. -// If not specified, defaults to 0 -func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["num_reserved_ids"] = value - } -} - -// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value. -// -// value: A sampler can be used to sample from a subset of the original range -// in order to speed up the whole computation through parallelism. This parameter -// (together with 'shard') indicates the number of partitions that are being -// used in the overall computation. -// If not specified, defaults to 1 -// -// REQUIRES: value >= 1 -func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["num_shards"] = value - } -} - -// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value. -// -// value: A sampler can be used to sample from a subset of the original range -// in order to speed up the whole computation through parallelism. This parameter -// (together with 'num_shards') indicates the particular partition number of a -// sampler op, when partitioning is being used. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["shard"] = value - } -} - -// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value. -// -// value: A list of unigram counts or probabilities, one per ID in sequential -// order. Exactly one of vocab_file and unigrams should be passed to this op. -// If not specified, defaults to <> -func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["unigrams"] = value - } -} - -// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// A unigram sampler could use a fixed unigram distribution read from a -// file or passed in as an in-memory array instead of building up the distribution -// from data on the fly. There is also an option to skew the distribution by -// applying a distortion power to the weights. -// -// The vocabulary file should be in CSV-like format, with the last field -// being the weight associated with the word. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FixedUnigramCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. -type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) - -// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ThreadUnsafeUnigramCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MatrixDiagPartV3Attr is an optional argument to MatrixDiagPartV3. -type MatrixDiagPartV3Attr func(optionalAttr) - -// MatrixDiagPartV3Align sets the optional align attribute to value. -// -// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is -// a string specifying how superdiagonals and subdiagonals should be aligned, -// respectively. There are four possible alignments: "RIGHT_LEFT" (default), -// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals -// to the right (left-pads the row) and subdiagonals to the left (right-pads the -// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is -// the opposite alignment. -// If not specified, defaults to "RIGHT_LEFT" -func MatrixDiagPartV3Align(value string) MatrixDiagPartV3Attr { - return func(m optionalAttr) { - m["align"] = value - } -} - -// Returns the batched diagonal part of a batched tensor. -// -// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched -// `input`. -// -// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. -// Let `max_diag_len` be the maximum length among all diagonals to be extracted, -// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -// Let `num_diags` be the number of diagonals to extract, -// `num_diags = k[1] - k[0] + 1`. -// -// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape -// `[I, J, ..., L, max_diag_len]` and values: -// -// ``` -// diagonal[i, j, ..., l, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. -// -// Otherwise, the output tensor has rank `r` with dimensions -// `[I, J, ..., L, num_diags, max_diag_len]` with values: -// -// ``` -// diagonal[i, j, ..., l, m, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `d = k[1] - m`, `y = max(-d, 0) - offset`, and `x = max(d, 0) - offset`. -// -// `offset` is zero except when the alignment of the diagonal is to the right. -// ``` -// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} -// and `d >= 0`) or -// (`align` in {LEFT_RIGHT, RIGHT_RIGHT} -// and `d <= 0`) -// 0 ; otherwise -// ``` -// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. -// -// The input must be at least a matrix. -// -// For example: -// -// ``` -// input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) -// [5, 6, 7, 8], -// [9, 8, 7, 6]], -// [[5, 4, 3, 2], -// [1, 2, 3, 4], -// [5, 6, 7, 8]]]) -// -// # A main diagonal from each batch. -// tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) -// [5, 2, 7]] -// -// # A superdiagonal from each batch. -// tf.matrix_diag_part(input, k = 1) -// ==> [[2, 7, 6], # Output shape: (2, 3) -// [4, 3, 8]] -// -// # A band from each batch. -// tf.matrix_diag_part(input, k = (-1, 2)) -// ==> [[[0, 3, 8], # Output shape: (2, 4, 3) -// [2, 7, 6], -// [1, 6, 7], -// [5, 8, 0]], -// [[0, 3, 4], -// [4, 3, 8], -// [5, 2, 7], -// [1, 6, 0]]] -// -// # LEFT_RIGHT alignment. -// tf.matrix_diag_part(input, k = (-1, 2), align="LEFT_RIGHT") -// ==> [[[3, 8, 0], # Output shape: (2, 4, 3) -// [2, 7, 6], -// [1, 6, 7], -// [0, 5, 8]], -// [[3, 4, 0], -// [4, 3, 8], -// [5, 2, 7], -// [0, 1, 6]]] -// -// # max_diag_len can be shorter than the main diagonal. -// tf.matrix_diag_part(input, k = (-2, -1)) -// ==> [[[5, 8], -// [9, 0]], -// [[1, 6], -// [5, 0]]] -// -// # padding_value = 9 -// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) -// ==> [[[9, 9, 4], # Output shape: (2, 3, 3) -// [9, 3, 8], -// [2, 7, 6]], -// [[9, 9, 2], -// [9, 3, 4], -// [4, 3, 8]]] -// -// ``` -// -// Arguments: -// input: Rank `r` tensor where `r >= 2`. -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// padding_value: The value to fill the area outside the specified diagonal band with. -// Default is 0. -// -// Returns The extracted diagonal(s). -func MatrixDiagPartV3(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output, optional ...MatrixDiagPartV3Attr) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixDiagPartV3", - Input: []tf.Input{ - input, k, padding_value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler. -type LearnedUnigramCandidateSamplerAttr func(optionalAttr) - -// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LearnedUnigramCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler. -type LogUniformCandidateSamplerAttr func(optionalAttr) - -// LogUniformCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a log-uniform distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LogUniformCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Selects the k nearest centers for each point. -// -// Rows of points are assumed to be input points. Rows of centers are assumed to be -// the list of candidate centers. For each point, the k centers that have least L2 -// distance to it are computed. -// -// Arguments: -// points: Matrix of shape (n, d). Rows are assumed to be input points. -// centers: Matrix of shape (m, d). Rows are assumed to be centers. -// k: Number of nearest centers to return for each point. If k is larger than m, then -// only m centers are returned. -// -// Returns: -// nearest_center_indices: Matrix of shape (n, min(m, k)). Each row contains the indices of the centers -// closest to the corresponding point, ordered by increasing distance. -// nearest_center_distances: Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the -// corresponding center in nearest_center_indices. -func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NearestNeighbors", - Input: []tf.Input{ - points, centers, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Returns the index of a data point that should be added to the seed set. -// -// Entries in distances are assumed to be squared distances of candidate points to -// the already sampled centers in the seed set. The op constructs one Markov chain -// of the k-MC^2 algorithm and returns the index of one candidate point to be added -// as an additional cluster center. -// -// Arguments: -// distances: Vector with squared distances to the closest previously sampled cluster center -// for each candidate point. -// seed: Scalar. Seed for initializing the random number generator. -// -// Returns Scalar with the index of the sampled point. -func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "KMC2ChainInitialization", - Input: []tf.Input{ - distances, seed, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Selects num_to_sample rows of input using the KMeans++ criterion. -// -// Rows of points are assumed to be input points. One row is selected at random. -// Subsequent rows are sampled with probability proportional to the squared L2 -// distance from the nearest row selected thus far till num_to_sample rows have -// been sampled. -// -// Arguments: -// points: Matrix of shape (n, d). Rows are assumed to be input points. -// num_to_sample: Scalar. The number of rows to sample. This value must not be larger than n. -// seed: Scalar. Seed for initializing the random number generator. -// num_retries_per_sample: Scalar. For each row that is sampled, this parameter -// specifies the number of additional points to draw from the current -// distribution before selecting the best. If a negative value is specified, a -// heuristic is used to sample O(log(num_to_sample)) additional points. -// -// Returns Matrix of shape (num_to_sample, d). The sampled rows. -func KmeansPlusPlusInitialization(scope *Scope, points tf.Output, num_to_sample tf.Output, seed tf.Output, num_retries_per_sample tf.Output) (samples tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "KmeansPlusPlusInitialization", - Input: []tf.Input{ - points, num_to_sample, seed, num_retries_per_sample, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AbortAttr is an optional argument to Abort. -type AbortAttr func(optionalAttr) - -// AbortErrorMsg sets the optional error_msg attribute to value. -// -// value: A string which is the message associated with the exception. -// If not specified, defaults to "" -func AbortErrorMsg(value string) AbortAttr { - return func(m optionalAttr) { - m["error_msg"] = value - } -} - -// AbortExitWithoutError sets the optional exit_without_error attribute to value. -// If not specified, defaults to false -func AbortExitWithoutError(value bool) AbortAttr { - return func(m optionalAttr) { - m["exit_without_error"] = value - } -} - -// Raise a exception to abort the process when called. -// -// If exit_without_error is true, the process will exit normally, -// otherwise it will exit with a SIGABORT signal. -// -// Returns nothing but an exception. -// -// Returns the created operation. -func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Abort", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Makes its input available to the next iteration. -// -// Arguments: -// data: The tensor to be made available to the next iteration. -// -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NextIteration", - Input: []tf.Input{ - data, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Exits the current frame to its parent frame. -// -// Exit makes its input `data` available to the parent frame. -// -// Arguments: -// data: The tensor to be made available to the parent frame. -// -// Returns The same tensor as `data`. -func Exit(scope *Scope, data tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Exit", - Input: []tf.Input{ - data, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnterAttr is an optional argument to Enter. -type EnterAttr func(optionalAttr) - -// EnterIsConstant sets the optional is_constant attribute to value. -// -// value: If true, the output is constant within the child frame. -// If not specified, defaults to false -func EnterIsConstant(value bool) EnterAttr { - return func(m optionalAttr) { - m["is_constant"] = value - } -} - -// EnterParallelIterations sets the optional parallel_iterations attribute to value. -// -// value: The number of iterations allowed to run in parallel. -// If not specified, defaults to 10 -func EnterParallelIterations(value int64) EnterAttr { - return func(m optionalAttr) { - m["parallel_iterations"] = value - } -} - -// Creates or finds a child frame, and makes `data` available to the child frame. -// -// This op is used together with `Exit` to create loops in the graph. -// The unique `frame_name` is used by the `Executor` to identify frames. If -// `is_constant` is true, `output` is a constant in the child frame; otherwise -// it may be changed in the child frame. At most `parallel_iterations` iterations -// are run in parallel in the child frame. -// -// Arguments: -// data: The tensor to be made available to the child frame. -// frame_name: The name of the child frame. -// -// Returns The same tensor as `data`. -func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"frame_name": frame_name} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Enter", - Input: []tf.Input{ - data, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DenseCountSparseOutputAttr is an optional argument to DenseCountSparseOutput. -type DenseCountSparseOutputAttr func(optionalAttr) - -// DenseCountSparseOutputMinlength sets the optional minlength attribute to value. -// -// value: Minimum value to count. Can be set to -1 for no minimum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func DenseCountSparseOutputMinlength(value int64) DenseCountSparseOutputAttr { - return func(m optionalAttr) { - m["minlength"] = value - } -} - -// DenseCountSparseOutputMaxlength sets the optional maxlength attribute to value. -// -// value: Maximum value to count. Can be set to -1 for no maximum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func DenseCountSparseOutputMaxlength(value int64) DenseCountSparseOutputAttr { - return func(m optionalAttr) { - m["maxlength"] = value - } -} - -// Performs sparse-output bin counting for a tf.tensor input. -// -// Counts the number of times each value occurs in the input. -// -// Arguments: -// values: Tensor containing data to count. -// weights: A Tensor of the same shape as indices containing per-index weight values. May -// also be the empty tensor if no weights are used. -// binary_output: Whether to output the number of occurrences of each value or 1. -// -// Returns: -// output_indices: Indices tensor for the resulting sparse tensor object. -// output_values: Values tensor for the resulting sparse tensor object. -// output_dense_shape: Shape tensor for the resulting sparse tensor object. -func DenseCountSparseOutput(scope *Scope, values tf.Output, weights tf.Output, binary_output bool, optional ...DenseCountSparseOutputAttr) (output_indices tf.Output, output_values tf.Output, output_dense_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"binary_output": binary_output} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DenseCountSparseOutput", - Input: []tf.Input{ - values, weights, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder. -type CTCBeamSearchDecoderAttr func(optionalAttr) - -// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value. -// -// value: If true, merge repeated classes in output. -// If not specified, defaults to true -func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr { - return func(m optionalAttr) { - m["merge_repeated"] = value - } -} - -// Performs beam search decoding on the logits given in input. -// -// A note about the attribute merge_repeated: For the beam search decoder, -// this means that if consecutive entries in a beam are the same, only -// the first of these is emitted. That is, when the top path is "A B B B B", -// "A B" is returned if merge_repeated = True but "A B B B B" is -// returned if merge_repeated = False. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// sequence_length: A vector containing sequence lengths, size `(batch)`. -// beam_width: A scalar >= 0 (beam search beam width). -// top_paths: A scalar >= 0, <= beam_width (controls output size). -// -// Returns: -// decoded_indices: A list (length: top_paths) of indices matrices. Matrix j, -// size `(total_decoded_outputs[j] x 2)`, has indices of a -// `SparseTensor`. The rows store: [batch, time]. -// decoded_values: A list (length: top_paths) of values vectors. Vector j, -// size `(length total_decoded_outputs[j])`, has the values of a -// `SparseTensor`. The vector stores the decoded classes for beam j. -// decoded_shape: A list (length: top_paths) of shape vector. Vector j, -// size `(2)`, stores the shape of the decoded `SparseTensor[j]`. -// Its values are: `[batch_size, max_decoded_length[j]]`. -// log_probability: A matrix, shaped: `(batch_size x top_paths)`. The -// sequence log-probabilities. -func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCBeamSearchDecoder", - Input: []tf.Input{ - inputs, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil { - scope.UpdateErr("CTCBeamSearchDecoder", err) - return - } - log_probability = op.Output(idx) - return decoded_indices, decoded_values, decoded_shape, log_probability -} - -// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder. -type CTCGreedyDecoderAttr func(optionalAttr) - -// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value. -// -// value: If True, merge repeated classes in output. -// If not specified, defaults to false -func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr { - return func(m optionalAttr) { - m["merge_repeated"] = value - } -} - -// Performs greedy decoding on the logits given in inputs. -// -// A note about the attribute merge_repeated: if enabled, when -// consecutive logits' maximum indices are the same, only the first of -// these is emitted. Labeling the blank '*', the sequence "A B B * B B" -// becomes "A B B" if merge_repeated = True and "A B B B B" if -// merge_repeated = False. -// -// Regardless of the value of merge_repeated, if the maximum index of a given -// time and batch corresponds to the blank, index `(num_classes - 1)`, no new -// element is emitted. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// sequence_length: A vector containing sequence lengths, size `(batch_size)`. -// -// Returns: -// decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`, -// of a `SparseTensor`. The rows store: [batch, time]. -// decoded_values: Values vector, size: `(total_decoded_outputs)`, -// of a `SparseTensor`. The vector stores the decoded classes. -// decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor. -// Values are: `[batch_size, max_decoded_length]`. -// log_probability: Matrix, size `(batch_size x 1)`, containing sequence -// log-probabilities. -func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCGreedyDecoder", - Input: []tf.Input{ - inputs, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// CTCLossAttr is an optional argument to CTCLoss. -type CTCLossAttr func(optionalAttr) - -// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value. -// -// value: Scalar, if true then repeated labels are -// collapsed prior to the CTC calculation. -// If not specified, defaults to false -func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr { - return func(m optionalAttr) { - m["preprocess_collapse_repeated"] = value - } -} - -// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value. -// -// value: Scalar. If set to false, *during* CTC calculation -// repeated non-blank labels will not be merged and are interpreted as -// individual labels. This is a simplified version of CTC. -// If not specified, defaults to true -func CTCLossCtcMergeRepeated(value bool) CTCLossAttr { - return func(m optionalAttr) { - m["ctc_merge_repeated"] = value - } -} - -// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value. -// -// value: Scalar. If set to true, during CTC -// calculation, items that have longer output sequences than input sequences -// are skipped: they don't contribute to the loss term and have zero-gradient. -// If not specified, defaults to false -func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr { - return func(m optionalAttr) { - m["ignore_longer_outputs_than_inputs"] = value - } -} - -// Calculates the CTC Loss (log probability) for each batch entry. Also calculates -// -// the gradient. This class performs the softmax operation for you, so inputs -// should be e.g. linear projections of outputs by an LSTM. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. -// labels_indices: The indices of a `SparseTensor`. -// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for -// `(batch b, time t)`. -// labels_values: The values (labels) associated with the given batch and time. -// sequence_length: A vector containing sequence lengths (batch). -// -// Returns: -// loss: A vector (batch) containing log-probabilities. -// gradient: The gradient of `loss`. 3-D, shape: -// `(max_time x batch_size x num_classes)`. -func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCLoss", - Input: []tf.Input{ - inputs, labels_indices, labels_values, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams. -type CudnnRNNCanonicalToParamsAttr func(optionalAttr) - -// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Converts CudnnRNN params from canonical form to usable form. -// -// Writes a set of weights into the opaque params buffer so they can be used in -// upcoming training or inferences. -// -// Note that the params buffer may not be compatible across different GPUs. So any -// save and restoration should be converted to and from the canonical weights and -// biases. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// weights: the canonical form of weights that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// biases: the canonical form of biases that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// num_params: number of parameter sets for all layers. -// Each layer may contain multiple parameter sets, with each set consisting of -// a weight matrix and a bias vector. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNCanonicalToParams", - Input: []tf.Input{ - num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNParamsToCanonicalV2Attr is an optional argument to CudnnRNNParamsToCanonicalV2. -type CudnnRNNParamsToCanonicalV2Attr func(optionalAttr) - -// CudnnRNNParamsToCanonicalV2RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNParamsToCanonicalV2RnnMode(value string) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNParamsToCanonicalV2InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNParamsToCanonicalV2InputMode(value string) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNParamsToCanonicalV2Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNParamsToCanonicalV2Direction(value string) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNParamsToCanonicalV2Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalV2Dropout(value float32) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNParamsToCanonicalV2Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalV2Seed(value int64) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNParamsToCanonicalV2Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalV2Seed2(value int64) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNParamsToCanonicalV2NumProj sets the optional num_proj attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalV2NumProj(value int64) CudnnRNNParamsToCanonicalV2Attr { - return func(m optionalAttr) { - m["num_proj"] = value - } -} - -// Retrieves CudnnRNN params in canonical form. It supports the projection in LSTM. -// -// Retrieves a set of weights from the opaque params buffer that can be saved and -// restored in a way compatible with future runs. -// -// Note that the params buffer may not be compatible across different GPUs. So any -// save and restoration should be converted to and from the canonical weights and -// biases. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// num_params_weights: number of weight parameter matrix for all layers. -// num_params_biases: number of bias parameter vector for all layers. -// weights: the canonical form of weights that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// biases: the canonical form of biases that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -// num_proj: The output dimensionality for the projection matrices. If None or 0, -// no projection is performed. -func CudnnRNNParamsToCanonicalV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params_weights int64, num_params_biases int64, optional ...CudnnRNNParamsToCanonicalV2Attr) (weights []tf.Output, biases []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_params_weights": num_params_weights, "num_params_biases": num_params_biases} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNParamsToCanonicalV2", - Input: []tf.Input{ - num_layers, num_units, input_size, params, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil { - scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err) - return - } - if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil { - scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err) - return - } - return weights, biases -} - -// Returns the diagonal part of the tensor. -// -// This operation returns a tensor with the `diagonal` part -// of the `input`. The `diagonal` part is computed as follows: -// -// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a -// tensor of rank `k` with dimensions `[D1,..., Dk]` where: -// -// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`. -// -// For example: -// -// ``` -// # 'input' is [[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]] -// -// tf.diag_part(input) ==> [1, 2, 3, 4] -// ``` -// -// Arguments: -// input: Rank k tensor where k is even and not zero. -// -// Returns The extracted diagonal. -func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DiagPart", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNParamsToCanonicalAttr is an optional argument to CudnnRNNParamsToCanonical. -type CudnnRNNParamsToCanonicalAttr func(optionalAttr) - -// CudnnRNNParamsToCanonicalRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNParamsToCanonicalRnnMode(value string) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNParamsToCanonicalInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNParamsToCanonicalInputMode(value string) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNParamsToCanonicalDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNParamsToCanonicalDirection(value string) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNParamsToCanonicalDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalDropout(value float32) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNParamsToCanonicalSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalSeed(value int64) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNParamsToCanonicalSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsToCanonicalSeed2(value int64) CudnnRNNParamsToCanonicalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Retrieves CudnnRNN params in canonical form. -// -// Retrieves a set of weights from the opaque params buffer that can be saved and -// restored in a way compatible with future runs. -// -// Note that the params buffer may not be compatible across different GPUs. So any -// save and restoration should be converted to and from the canonical weights and -// biases. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// num_params: number of parameter sets for all layers. -// Each layer may contain multiple parameter sets, with each set consisting of -// a weight matrix and a bias vector. -// weights: the canonical form of weights that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// biases: the canonical form of biases that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -func CudnnRNNParamsToCanonical(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params int64, optional ...CudnnRNNParamsToCanonicalAttr) (weights []tf.Output, biases []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_params": num_params} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNParamsToCanonical", - Input: []tf.Input{ - num_layers, num_units, input_size, params, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil { - scope.UpdateErr("CudnnRNNParamsToCanonical", err) - return - } - if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil { - scope.UpdateErr("CudnnRNNParamsToCanonical", err) - return - } - return weights, biases -} - -// CudnnRNNBackpropV3Attr is an optional argument to CudnnRNNBackpropV3. -type CudnnRNNBackpropV3Attr func(optionalAttr) - -// CudnnRNNBackpropV3RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNBackpropV3RnnMode(value string) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNBackpropV3InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNBackpropV3InputMode(value string) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNBackpropV3Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNBackpropV3Direction(value string) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNBackpropV3Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV3Dropout(value float32) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNBackpropV3Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV3Seed(value int64) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNBackpropV3Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV3Seed2(value int64) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNBackpropV3NumProj sets the optional num_proj attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV3NumProj(value int64) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["num_proj"] = value - } -} - -// CudnnRNNBackpropV3TimeMajor sets the optional time_major attribute to value. -// If not specified, defaults to true -func CudnnRNNBackpropV3TimeMajor(value bool) CudnnRNNBackpropV3Attr { - return func(m optionalAttr) { - m["time_major"] = value - } -} - -// Backprop step of CudnnRNNV3. -// -// Compute the backprop of both data and weights in a RNN. Takes an extra -// "sequence_lengths" input than CudnnRNNBackprop. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicates whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: If time_major is true, this is a 3-D tensor with the shape of -// [seq_length, batch_size, input_size]. If time_major is false, the shape is -// [batch_size, seq_length, input_size]. -// input_h: If time_major is true, this is a 3-D tensor with the shape of -// [num_layer * dir, batch_size, num_units]. If time_major is false, the shape -// is [batch_size, num_layer * dir, num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// sequence_lengths: a vector of lengths of each input sequence. -// output: If time_major is true, this is a 3-D tensor with the shape of -// [seq_length, batch_size, dir * num_units]. If time_major is false, the -// shape is [batch_size, seq_length, dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// output_backprop: A 3-D tensor with the same shape as output in the forward pass. -// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward -// pass. -// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward -// pass. -// time_major: Indicates whether the input/output format is time major or batch -// major. -// reserve_space: The same reserve_space produced in the forward operation. -// input_backprop: The backprop to input in the forward pass. Has the same shape -// as input. -// input_h_backprop: The backprop to input_h in the forward pass. Has the same -// shape as input_h. -// input_c_backprop: The backprop to input_c in the forward pass. Has the same -// shape as input_c. -// params_backprop: The backprop to the params buffer in the forward pass. Has the -// same shape as params. -func CudnnRNNBackpropV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, host_reserved tf.Output, optional ...CudnnRNNBackpropV3Attr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNBackpropV3", - Input: []tf.Input{ - input, input_h, input_c, params, sequence_lengths, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, host_reserved, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop. -type CudnnRNNBackpropAttr func(optionalAttr) - -// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNBackpropDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNBackpropDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNBackpropSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Backprop step of CudnnRNN. -// -// Compute the backprop of both data and weights in a RNN. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// output_backprop: A 3-D tensor with the same shape as output in the forward pass. -// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward -// pass. -// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward -// pass. -// reserve_space: The same reserve_space produced in for forward operation. -// input_backprop: The backprop to input in the forward pass. Has the same shape -// as input. -// input_h_backprop: The backprop to input_h in the forward pass. Has the same -// shape as input_h. -// input_c_backprop: The backprop to input_c in the forward pass. Has the same -// shape as input_c. -// params_backprop: The backprop to the params buffer in the forward pass. Has the -// same shape as params. -func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNBackprop", - Input: []tf.Input{ - input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// CudnnRNNV3Attr is an optional argument to CudnnRNNV3. -type CudnnRNNV3Attr func(optionalAttr) - -// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNV3InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNV3Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNV3Direction(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNV3Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNV3Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNV3Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNV3NumProj sets the optional num_proj attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3NumProj(value int64) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["num_proj"] = value - } -} - -// CudnnRNNV3IsTraining sets the optional is_training attribute to value. -// If not specified, defaults to true -func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// CudnnRNNV3TimeMajor sets the optional time_major attribute to value. -// If not specified, defaults to true -func CudnnRNNV3TimeMajor(value bool) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["time_major"] = value - } -} - -// A RNN backed by cuDNN. -// -// Computes the RNN from the input and initial states, with respect to the params -// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicates whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: If time_major is true, this is a 3-D tensor with the shape of -// [seq_length, batch_size, input_size]. If time_major is false, the shape is -// [batch_size, seq_length, input_size]. -// input_h: If time_major is true, this is a 3-D tensor with the shape of -// [num_layer * dir, batch_size, num_units]. If time_major is false, the shape -// is [batch_size, num_layer * dir, num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// sequence_lengths: a vector of lengths of each input sequence. -// output: If time_major is true, this is a 3-D tensor with the shape of -// [seq_length, batch_size, dir * num_units]. If time_major is false, the -// shape is [batch_size, seq_length, dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// is_training: Indicates whether this operation is used for inference or -// training. -// time_major: Indicates whether the input/output format is time major or batch -// major. -// reserve_space: An opaque tensor that can be used in backprop calculation. It -// is only produced if is_training is true. -func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNV3", - Input: []tf.Input{ - input, input_h, input_c, params, sequence_lengths, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -// -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Pad", - Input: []tf.Input{ - input, paddings, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNV2Attr is an optional argument to CudnnRNNV2. -type CudnnRNNV2Attr func(optionalAttr) - -// CudnnRNNV2RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNV2RnnMode(value string) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNV2InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNV2InputMode(value string) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNV2Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNV2Direction(value string) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNV2Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV2Dropout(value float32) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNV2Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV2Seed(value int64) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNV2Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV2Seed2(value int64) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNV2IsTraining sets the optional is_training attribute to value. -// If not specified, defaults to true -func CudnnRNNV2IsTraining(value bool) CudnnRNNV2Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// A RNN backed by cuDNN. -// -// Computes the RNN from the input and initial states, with respect to the params -// buffer. Produces one extra output "host_reserved" than CudnnRNN. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicates whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// is_training: Indicates whether this operation is used for inference or -// training. -// reserve_space: An opaque tensor that can be used in backprop calculation. It -// is only produced if is_training is true. -// host_reserved: An opaque tensor that can be used in backprop calculation. It is -// only produced if is_training is true. It is output on host memory rather than -// device memory. -func CudnnRNNV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNV2Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNV2", - Input: []tf.Input{ - input, input_h, input_c, params, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. -type CudnnRNNParamsSizeAttr func(optionalAttr) - -// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNParamsSizeNumProj sets the optional num_proj attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeNumProj(value int64) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["num_proj"] = value - } -} - -// Computes size of weights that can be used by a Cudnn RNN model. -// -// Return the params size that can be used by the Cudnn RNN model. Subsequent -// weight allocation and initialization should use this size. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -// params_size: The size of the params buffer that should be allocated and -// initialized for this RNN model. Note that this params buffer may not be -// compatible across GPUs. Please use CudnnRNNParamsWeights and -// CudnnRNNParamsBiases to save and restore them in a way that is compatible -// across different runs. -func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T, "S": S} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNParamsSize", - Input: []tf.Input{ - num_layers, num_units, input_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RecordInputAttr is an optional argument to RecordInput. -type RecordInputAttr func(optionalAttr) - -// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value. -// -// value: Random seeds used to produce randomized records. -// If not specified, defaults to 301 -func RecordInputFileRandomSeed(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_random_seed"] = value - } -} - -// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value. -// -// value: Shifts the list of files after the list is randomly -// shuffled. -// If not specified, defaults to 0 -func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr { - return func(m optionalAttr) { - m["file_shuffle_shift_ratio"] = value - } -} - -// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value. -// -// value: The randomization shuffling buffer. -// If not specified, defaults to 10000 -func RecordInputFileBufferSize(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_buffer_size"] = value - } -} - -// RecordInputFileParallelism sets the optional file_parallelism attribute to value. -// -// value: How many sstables are opened and concurrently iterated over. -// If not specified, defaults to 16 -func RecordInputFileParallelism(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["file_parallelism"] = value - } -} - -// RecordInputBatchSize sets the optional batch_size attribute to value. -// -// value: The batch size. -// If not specified, defaults to 32 -func RecordInputBatchSize(value int64) RecordInputAttr { - return func(m optionalAttr) { - m["batch_size"] = value - } -} - -// RecordInputCompressionType sets the optional compression_type attribute to value. -// -// value: The type of compression for the file. Currently ZLIB and -// GZIP are supported. Defaults to none. -// If not specified, defaults to "" -func RecordInputCompressionType(value string) RecordInputAttr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// Emits randomized records. -// -// Arguments: -// file_pattern: Glob pattern for the data files. -// -// Returns A tensor of shape [batch_size]. -func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"file_pattern": file_pattern} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RecordInput", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize. -type OrderedMapIncompleteSizeAttr func(optionalAttr) - -// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapIncompleteSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of incomplete elements in the underlying container. -func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapIncompleteSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapSizeAttr is an optional argument to OrderedMapSize. -type OrderedMapSizeAttr func(optionalAttr) - -// OrderedMapSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapSizeCapacity(value int64) OrderedMapSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapSizeMemoryLimit(value int64) OrderedMapSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapSizeContainer(value string) OrderedMapSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapSizeSharedName(value string) OrderedMapSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func OrderedMapSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. -type OrderedMapUnstageNoKeyAttr func(optionalAttr) - -// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the (key, value) element with the smallest -// -// key from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapUnstageNoKey", - Input: []tf.Input{ - indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapUnstageNoKey", err) - return - } - return key, values -} - -// OrderedMapPeekAttr is an optional argument to OrderedMapPeek. -type OrderedMapPeekAttr func(optionalAttr) - -// OrderedMapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekCapacity(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapPeekMemoryLimit(value int64) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekContainer(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapPeekSharedName(value string) OrderedMapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the -// -// underlying container does not contain this key -// this op will block until it does. This Op is optimized for -// performance. -func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapPeekAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapPeek", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapPeek", err) - return - } - return values -} - -// MapIncompleteSizeAttr is an optional argument to MapIncompleteSize. -type MapIncompleteSizeAttr func(optionalAttr) - -// MapIncompleteSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapIncompleteSizeCapacity(value int64) MapIncompleteSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapIncompleteSizeMemoryLimit(value int64) MapIncompleteSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapIncompleteSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapIncompleteSizeContainer(value string) MapIncompleteSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapIncompleteSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapIncompleteSizeSharedName(value string) MapIncompleteSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of incomplete elements in the underlying container. -func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncompleteSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapIncompleteSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MapSizeAttr is an optional argument to MapSize. -type MapSizeAttr func(optionalAttr) - -// MapSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapSizeCapacity(value int64) MapSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapSizeMemoryLimit(value int64) MapSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapSizeContainer(value string) MapSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapSizeSharedName(value string) MapSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) - -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns a random (key, value) -// -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", - Input: []tf.Input{ - indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return - } - return key, values -} - -// UnbatchAttr is an optional argument to Unbatch. -type UnbatchAttr func(optionalAttr) - -// UnbatchContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func UnbatchContainer(value string) UnbatchAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// UnbatchSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func UnbatchSharedName(value string) UnbatchAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Reverses the operation of Batch for a single output Tensor. -// -// An instance of Unbatch either receives an empty batched_tensor, in which case it -// asynchronously waits until the values become available from a concurrently -// running instance of Unbatch with the same container and shared_name, or receives -// a non-empty batched_tensor in which case it finalizes all other concurrently -// running instances and outputs its own element from the batch. -// -// batched_tensor: The possibly transformed output of Batch. The size of the first -// dimension should remain unchanged by the transformations for the operation to -// work. -// batch_index: The matching batch_index obtained from Batch. -// id: The id scalar emitted by Batch. -// unbatched_tensor: The Tensor corresponding to this execution. -// timeout_micros: Maximum amount of time (in microseconds) to wait to receive the -// batched input tensor associated with a given invocation of the op. -// container: Container to control resource sharing. -// shared_name: Instances of Unbatch with the same container and shared_name are -// assumed to possibly belong to the same batch. If left empty, the op name will -// be used as the shared name. -func Unbatch(scope *Scope, batched_tensor tf.Output, batch_index tf.Output, id tf.Output, timeout_micros int64, optional ...UnbatchAttr) (unbatched_tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"timeout_micros": timeout_micros} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unbatch", - Input: []tf.Input{ - batched_tensor, batch_index, id, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MapUnstageAttr is an optional argument to MapUnstage. -type MapUnstageAttr func(optionalAttr) - -// MapUnstageCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageCapacity(value int64) MapUnstageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageMemoryLimit(value int64) MapUnstageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageContainer(value string) MapUnstageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageSharedName(value string) MapUnstageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the values associated with the key -// -// from the underlying container. If the underlying container -// does not contain this key, the op will block until it does. -func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstage", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstage", err) - return - } - return values -} - -// StageSizeAttr is an optional argument to StageSize. -type StageSizeAttr func(optionalAttr) - -// StageSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeCapacity(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeMemoryLimit(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageSizeContainer(value string) StageSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageSizeSharedName(value string) StageSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StageSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StagePeekAttr is an optional argument to StagePeek. -type StagePeekAttr func(optionalAttr) - -// StagePeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StagePeekCapacity(value int64) StagePeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StagePeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StagePeekMemoryLimit(value int64) StagePeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StagePeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StagePeekContainer(value string) StagePeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StagePeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StagePeekSharedName(value string) StagePeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified index. If the -// -// underlying container does not contain sufficient elements -// this op will block until it does. This Op is optimized for -// performance. -func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StagePeek", - Input: []tf.Input{ - index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("StagePeek", err) - return - } - return values -} - -// UnstageAttr is an optional argument to Unstage. -type UnstageAttr func(optionalAttr) - -// UnstageCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func UnstageCapacity(value int64) UnstageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// UnstageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func UnstageMemoryLimit(value int64) UnstageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// UnstageContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func UnstageContainer(value string) UnstageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// UnstageSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func UnstageSharedName(value string) UnstageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op is similar to a lightweight Dequeue. -// -// The basic functionality is similar to dequeue with many fewer -// capabilities and options. This Op is optimized for performance. -func Unstage(scope *Scope, dtypes []tf.DataType, optional ...UnstageAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unstage", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("Unstage", err) - return - } - return values -} - -// StageAttr is an optional argument to Stage. -type StageAttr func(optionalAttr) - -// StageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageCapacity(value int64) StageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageMemoryLimit sets the optional memory_limit attribute to value. -// -// value: The maximum number of bytes allowed for Tensors in the Staging Area. -// If > 0, inserts will block until sufficient space is available. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageMemoryLimit(value int64) StageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func StageContainer(value string) StageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func StageSharedName(value string) StageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Stage values similar to a lightweight Enqueue. -// -// The basic functionality of this Op is similar to a queue with many -// fewer capabilities and options. This Op is optimized for performance. -// -// Arguments: -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// Returns the created operation. -func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Stage", - Input: []tf.Input{ - tf.OutputList(values), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Delete the tensor specified by its handle in the session. -// -// Arguments: -// handle: The handle for a tensor stored in the session state. -// -// Returns the created operation. -func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DeleteSessionTensor", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// Store the input tensor in the state of the current session. -// -// Arguments: -// value: The tensor to be stored. -// -// Returns The handle for the tensor stored in the session state, represented -// as a ResourceHandle object. -func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GetSessionHandleV2", - Input: []tf.Input{ - value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Store the input tensor in the state of the current session. -// -// Arguments: -// value: The tensor to be stored. -// -// Returns The handle for the tensor stored in the session state, represented -// as a string. -func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GetSessionHandle", - Input: []tf.Input{ - value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Copy a tensor setting everything outside a central band in each innermost matrix to zero. -// -// The `band` part is computed as follows: -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor with the same shape where -// -// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. -// -// The indicator function -// -// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && -// (num_upper < 0 || (n-m) <= num_upper)`. -// -// For example: -// -// ``` -// # if 'input' is [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [-2, -1, 0, 1] -// [-3, -2, -1, 0]], -// -// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [ 0, -1, 0, 1] -// [ 0, 0, -1, 0]], -// -// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] -// [-1, 0, 1, 0] -// [-2, -1, 0, 1] -// [ 0, -2, -1, 0]] -// ``` -// -// Useful special cases: -// -// ``` -// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. -// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. -// tf.matrix_band_part(input, 0, 0) ==> Diagonal. -// ``` -// -// Arguments: -// input: Rank `k` tensor. -// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire -// lower triangle. -// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep -// entire upper triangle. -// -// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. -func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixBandPart", - Input: []tf.Input{ - input, num_lower, num_upper, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ListDiffAttr is an optional argument to ListDiff. -type ListDiffAttr func(optionalAttr) - -// ListDiffOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func ListDiffOutIdx(value tf.DataType) ListDiffAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Computes the difference between two lists of numbers or strings. -// -// Given a list `x` and a list `y`, this operation returns a list `out` that -// represents all values that are in `x` but not in `y`. The returned list `out` -// is sorted in the same order that the numbers appear in `x` (duplicates are -// preserved). This operation also returns a list `idx` that represents the -// position of each `out` element in `x`. In other words: -// -// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]` -// -// For example, given this input: -// -// ``` -// x = [1, 2, 3, 4, 5, 6] -// y = [1, 3, 5] -// ``` -// -// This operation would return: -// -// ``` -// out ==> [2, 4, 6] -// idx ==> [1, 3, 5] -// ``` -// -// Arguments: -// x: 1-D. Values to keep. -// y: 1-D. Values to remove. -// -// Returns: -// out: 1-D. Values present in `x` but not in `y`. -// idx: 1-D. Positions of `x` values preserved in `out`. -func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ListDiff", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Deprecated. Use TensorArrayScatterV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayScatterV3 -func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayScatterV2", - Input: []tf.Input{ - handle, indices, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayReadV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 -func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "TensorArrayReadV2", - Input: []tf.Input{ - handle, index, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV2Attr is an optional argument to TensorArrayV2. -type TensorArrayV2Attr func(optionalAttr) - -// TensorArrayV2ElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. -// If not specified, defaults to false -func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } -} - -// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. -// If not specified, defaults to true -func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} - -// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. -// If not specified, defaults to "" -func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { - return func(m optionalAttr) { - m["tensor_array_name"] = value - } -} - -// Deprecated. Use TensorArrayV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayV3 -func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayV2", - Input: []tf.Input{ - size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Get the current size of the TensorArray. -// -// Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns The current size of the TensorArray. -func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArraySizeV3", - Input: []tf.Input{ - handle, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Split the data from the input value into TensorArray elements. -// -// Assuming that `lengths` takes on values -// -// ```(n0, n1, ..., n(T-1))``` -// -// and that `value` has shape -// -// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```, -// -// this splits values into a TensorArray with T tensors. -// -// TensorArray index t will be the subtensor of values with starting position -// -// ```(n0 + n1 + ... + n(t-1), 0, 0, ...)``` -// -// and having size -// -// ```nt x d0 x d1 x ...``` -// -// Arguments: -// handle: The handle to a TensorArray. -// value: The concatenated tensor to write to the TensorArray. -// lengths: The vector of lengths, how to split the rows of value into the -// TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns A float scalar that enforces proper chaining of operations. -func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArraySplitV3", - Input: []tf.Input{ - handle, value, lengths, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3. -type TensorArrayConcatV3Attr func(optionalAttr) - -// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. -// -// value: The expected shape of an element, if known, -// excluding the first dimension. Used to validate the shapes of -// TensorArray elements. If this shape is not fully specified, concatenating -// zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr { - return func(m optionalAttr) { - m["element_shape_except0"] = value - } -} - -// Concat the elements from the TensorArray into value `value`. -// -// Takes `T` elements of shapes -// -// ``` -// (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...) -// ``` -// -// and concatenates them into a Tensor of shape: -// -// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)``` -// -// All elements must have the same shape (excepting the first dimension). -// -// Arguments: -// handle: The handle to a TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. -// -// Returns: -// value: All of the elements in the TensorArray, concatenated along the first -// axis. -// lengths: A vector of the row sizes of the original T elements in the -// value output. In the example above, this would be the values: -// `(n1, n2, ..., n(T-1))`. -func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayConcatV3", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) - -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// Gather specific elements from the TensorArray into output `value`. -// -// All elements selected by `indices` must have the same shape. -// -// Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. -// -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", - Input: []tf.Input{ - handle, indices, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// GatherAttr is an optional argument to Gather. -type GatherAttr func(optionalAttr) - -// GatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func GatherValidateIndices(value bool) GatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Gather slices from `params` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] -// -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] -// -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -// -// If `indices` is a permutation and `len(indices) == params.shape[0]` then -// this operation will permute `params` accordingly. -// -// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in -// `indices` are always validated to be within range. If assigned to GPU, -// out-of-bound indices result in safe but unspecified behavior, which may include -// raising an error. -// -//
-// -//
-func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Gather", - Input: []tf.Input{ - params, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Read an element from the TensorArray into output `value`. -// -// Arguments: -// handle: The handle to a TensorArray. -// -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. -// -// Returns The tensor that is read from the TensorArray. -func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "TensorArrayReadV3", - Input: []tf.Input{ - handle, index, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Push an element onto the tensor_array. -// -// Arguments: -// handle: The handle to a TensorArray. -// index: The position to write to inside the TensorArray. -// value: The tensor to write to the TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns A float scalar that enforces proper chaining of operations. -func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayWriteV3", - Input: []tf.Input{ - handle, index, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a TensorArray for storing multiple gradients of values in the given handle. -// -// Similar to TensorArrayGradV3. However it creates an accumulator with an -// expanded shape compared to the input TensorArray whose gradient is being -// computed. This enables multiple gradients for the same TensorArray to be -// calculated using the same accumulator. -// -// Arguments: -// handle: The handle to the forward TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// shape_to_prepend: An int32 vector representing a shape. Elements in the gradient accumulator will -// have shape which is this shape_to_prepend value concatenated with shape of the -// elements in the TensorArray corresponding to the input handle. -// source: The gradient source string, used to decide which gradient TensorArray -// to return. -func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output, shape_to_prepend tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradWithShape", - Input: []tf.Input{ - handle, flow_in, shape_to_prepend, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Delete the stack from its resource container. -// -// Arguments: -// handle: The handle to a stack. -// -// Returns the created operation. -func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StackCloseV2", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// Pop the element at the top of the stack. -// -// Arguments: -// handle: The handle to a stack. -// elem_type: The type of the elem that is popped. -// -// Returns The tensor that is popped from the top of the stack. -func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"elem_type": elem_type} - opspec := tf.OpSpec{ - Type: "StackPopV2", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StackPushV2Attr is an optional argument to StackPushV2. -type StackPushV2Attr func(optionalAttr) - -// StackPushV2SwapMemory sets the optional swap_memory attribute to value. -// -// value: Swap `elem` to CPU. Default to false. -// If not specified, defaults to false -func StackPushV2SwapMemory(value bool) StackPushV2Attr { - return func(m optionalAttr) { - m["swap_memory"] = value - } -} - -// Push an element onto the stack. -// -// Arguments: -// handle: The handle to a stack. -// elem: The tensor to be pushed onto the stack. -// -// Returns The same tensor as the input 'elem'. -func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StackPushV2", - Input: []tf.Input{ - handle, elem, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StackV2Attr is an optional argument to StackV2. -type StackV2Attr func(optionalAttr) - -// StackV2StackName sets the optional stack_name attribute to value. -// -// value: Overrides the name used for the temporary stack resource. Default -// value is the name of the 'Stack' op (which is guaranteed unique). -// If not specified, defaults to "" -func StackV2StackName(value string) StackV2Attr { - return func(m optionalAttr) { - m["stack_name"] = value - } -} - -// A stack that produces elements in first-in last-out order. -// -// Arguments: -// max_size: The maximum size of the stack if non-negative. If negative, the stack -// size is unlimited. -// elem_type: The type of the elements on the stack. -// -// Returns The handle to the stack. -func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"elem_type": elem_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StackV2", - Input: []tf.Input{ - max_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Checks a tensor for NaN, -Inf and +Inf values. -// -// When run, reports an `InvalidArgument` error if `tensor` has any values -// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is. -// Unlike CheckNumerics (V1), CheckNumericsV2 distinguishes -Inf and +Inf in the -// errors it throws. -// -// Arguments: -// -// message: Prefix of the error message. -func CheckNumericsV2(scope *Scope, tensor tf.Output, message string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"message": message} - opspec := tf.OpSpec{ - Type: "CheckNumericsV2", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Applies a gradient to a given accumulator. -// -// Does not add if local_step is lesser than the accumulator's global_step. -// -// Arguments: -// handle: The handle to a accumulator. -// local_step: The local_step value at which the gradient was computed. -// gradient: A tensor of the gradient to be accumulated. -// -// Returns the created operation. -func ResourceAccumulatorApplyGradient(scope *Scope, handle tf.Output, local_step tf.Output, gradient tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceAccumulatorApplyGradient", - Input: []tf.Input{ - handle, local_step, gradient, - }, - } - return scope.AddOperation(opspec) -} - -// Updates the accumulator with a new value for global_step. -// -// Logs warning if the accumulator's value is already higher than -// new_global_step. -// -// Arguments: -// handle: The handle to an accumulator. -// new_global_step: The new global_step value to set. -// -// Returns the created operation. -func ResourceAccumulatorSetGlobalStep(scope *Scope, handle tf.Output, new_global_step tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceAccumulatorSetGlobalStep", - Input: []tf.Input{ - handle, new_global_step, - }, - } - return scope.AddOperation(opspec) -} - -// Computes the number of elements in the given queue. -// -// Arguments: -// handle: The handle to a queue. -// -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QueueSizeV2", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. -type QueueEnqueueManyV2Attr func(optionalAttr) - -// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is too full, this operation will block for up -// to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Enqueues zero or more tuples of one or more tensors in the given queue. -// -// This operation slices each component tensor along the 0th dimension to -// make multiple queue elements. All of the tuple components must have the -// same size in the 0th dimension. -// -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. -// -// N.B. If the queue is full, this operation will block until the given -// elements have been enqueued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should -// be taken. -// -// Returns the created operation. -func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueEnqueueManyV2", - Input: []tf.Input{ - handle, tf.OutputList(components), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2. -type QueueEnqueueV2Attr func(optionalAttr) - -// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is full, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Enqueues a tuple of one or more tensors in the given queue. -// -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. -// -// N.B. If the queue is full, this operation will block until the given -// element has been enqueued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should be taken. -// -// Returns the created operation. -func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueEnqueueV2", - Input: []tf.Input{ - handle, tf.OutputList(components), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// PriorityQueueV2Attr is an optional argument to PriorityQueueV2. -type PriorityQueueV2Attr func(optionalAttr) - -// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value. -// -// value: The type of each component in a value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["component_types"] = value - } -} - -// PriorityQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// PriorityQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PriorityQueueV2Container(value string) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// PriorityQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements sorted by the first component value. -// -// Note that the PriorityQueue requires the first component of any element -// to be a scalar int64, in addition to the other elements declared by -// component_types. Therefore calls to Enqueue and EnqueueMany (resp. Dequeue -// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra -// entry in their input (resp. output) lists. -// -// Arguments: -// shapes: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// -// Returns The handle to the queue. -func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shapes": shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PriorityQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Does nothing. Serves as a control trigger for scheduling. -// -// Only useful as a placeholder for control edges. -// -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ControlTrigger", - } - return scope.AddOperation(opspec) -} - -// Interleave the values from the `data` tensors into a single tensor. -// -// Builds a merged tensor such that -// -// ```python -// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] -// ``` -// -// For example, if each `indices[m]` is scalar or vector, we have -// -// ```python -// # Scalar indices: -// merged[indices[m], ...] = data[m][...] -// -// # Vector indices: -// merged[indices[m][i], ...] = data[m][i, ...] -// ``` -// -// Each `data[i].shape` must start with the corresponding `indices[i].shape`, -// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we -// must have `data[i].shape = indices[i].shape + constant`. In terms of this -// `constant`, the output shape is -// -// merged.shape = [max(indices)] + constant -// -// Values may be merged in parallel, so if an index appears in both `indices[m][i]` -// and `indices[n][j]`, the result may be invalid. This differs from the normal -// DynamicStitch operator that defines the behavior in that case. -// -// For example: -// -// ```python -// indices[0] = 6 -// indices[1] = [4, 1] -// indices[2] = [[5, 2], [0, 3]] -// data[0] = [61, 62] -// data[1] = [[41, 42], [11, 12]] -// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] -// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], -// [51, 52], [61, 62]] -// ``` -// -// This method can be used to merge partitions created by `dynamic_partition` -// as illustrated on the following example: -// -// ```python -// # Apply function (increments x_i) on elements for which a certain condition -// # apply (x_i != -1 in this example). -// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) -// condition_mask=tf.not_equal(x,tf.constant(-1.)) -// partitioned_data = tf.dynamic_partition( -// x, tf.cast(condition_mask, tf.int32) , 2) -// partitioned_data[1] = partitioned_data[1] + 1.0 -// condition_indices = tf.dynamic_partition( -// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) -// x = tf.dynamic_stitch(condition_indices, partitioned_data) -// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain -// # unchanged. -// ``` -// -//
-// -//
-func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ParallelDynamicStitch", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(data), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. -// -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, -// -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] -// -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` -// -// `data.shape` must start with `partitions.shape`. -// -// For example: -// -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] -// -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] -// ``` -// -// See `dynamic_stitch` for an example on how to merge partitions back. -// -//
-// -//
-// -// Arguments: -// -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_partitions": num_partitions} - opspec := tf.OpSpec{ - Type: "DynamicPartition", - Input: []tf.Input{ - data, partitions, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs -} - -// ResourceConditionalAccumulatorAttr is an optional argument to ResourceConditionalAccumulator. -type ResourceConditionalAccumulatorAttr func(optionalAttr) - -// ResourceConditionalAccumulatorContainer sets the optional container attribute to value. -// -// value: If non-empty, this accumulator is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func ResourceConditionalAccumulatorContainer(value string) ResourceConditionalAccumulatorAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// ResourceConditionalAccumulatorSharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this accumulator will be shared under the -// given name across multiple sessions. -// If not specified, defaults to "" -func ResourceConditionalAccumulatorSharedName(value string) ResourceConditionalAccumulatorAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// ResourceConditionalAccumulatorReductionType sets the optional reduction_type attribute to value. -// If not specified, defaults to "MEAN" -func ResourceConditionalAccumulatorReductionType(value string) ResourceConditionalAccumulatorAttr { - return func(m optionalAttr) { - m["reduction_type"] = value - } -} - -// A conditional accumulator for aggregating gradients. -// -// The accumulator accepts gradients marked with local_step greater or -// equal to the most recent global_step known to the accumulator. The -// average can be extracted from the accumulator, provided sufficient -// gradients have been accumulated. Extracting the average automatically -// resets the aggregate to 0, and increments the global_step recorded by -// the accumulator. -// This is a resource version of ConditionalAccumulator that will work in TF2.0 -// with tf.cond version 2. -// -// Arguments: -// dtype: The type of the value being accumulated. -// shape: The shape of the values, can be [], in which case shape is unknown. -// -// Returns The handle to the accumulator. -func ResourceConditionalAccumulator(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...ResourceConditionalAccumulatorAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceConditionalAccumulator", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle. -type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr) - -// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. -// -// value: The type list for the return values. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr { - return func(m optionalAttr) { - m["output_types"] = value - } -} - -// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. -// -// value: The list of shapes being produced. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr { - return func(m optionalAttr) { - m["output_shapes"] = value - } -} - -// Generates a MultiDeviceIterator resource from its provided string handle. -// -// Arguments: -// string_handle: String representing the resource. -// -// Returns A MultiDeviceIterator resource. -func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorFromStringHandle", - Input: []tf.Input{ - string_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a TensorArray for storing the gradients of values in the given handle. -// -// If the given TensorArray gradient already exists, returns a reference to it. -// -// Locks the size of the original TensorArray by disabling its dynamic size flag. -// -// **A note about the input flow_in:** -// -// The handle flow_in forces the execution of the gradient lookup to occur -// only after certain other operations have occurred. For example, when -// the forward TensorArray is dynamically sized, writes to this TensorArray -// may resize the object. The gradient TensorArray is statically sized based -// on the size of the forward TensorArray when this operation executes. -// Furthermore, the size of the forward TensorArray is frozen by this call. -// As a result, the flow is used to ensure that the call to generate the gradient -// TensorArray only happens after all writes are executed. -// -// In the case of dynamically sized TensorArrays, gradient computation should -// only be performed on read operations that have themselves been chained via -// flow to occur only after all writes have executed. That way the final size -// of the forward TensorArray is known when this operation is called. -// -// **A note about the source attribute:** -// -// TensorArray gradient calls use an accumulator TensorArray object. If -// multiple gradients are calculated and run in the same session, the multiple -// gradient nodes may accidentally flow through the same accumulator TensorArray. -// This double counts and generally breaks the TensorArray gradient flow. -// -// The solution is to identify which gradient call this particular -// TensorArray gradient is being called in. This is performed by identifying -// a unique string (e.g. "gradients", "gradients_1", ...) from the input -// gradient Tensor's name. This string is used as a suffix when creating -// the TensorArray gradient object here (the attribute `source`). -// -// The attribute `source` is added as a suffix to the forward TensorArray's -// name when performing the creation / lookup, so that each separate gradient -// calculation gets its own TensorArray accumulator. -// -// Arguments: -// handle: The handle to the forward TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// source: The gradient source string, used to decide which gradient TensorArray -// to return. -func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"source": source} - opspec := tf.OpSpec{ - Type: "TensorArrayGradV3", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Produces a string handle for the given MultiDeviceIterator. -// -// Arguments: -// multi_device_iterator: A MultiDeviceIterator resource. -// -// Returns A string representing the resource. -func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorToStringHandle", - Input: []tf.Input{ - multi_device_iterator, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gets next element for the provided shard number. -// -// Arguments: -// multi_device_iterator: A MultiDeviceIterator resource. -// shard_num: Integer representing which shard to fetch data for. -// incarnation_id: Which incarnation of the MultiDeviceIterator is running. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -// -// Returns Result of the get_next on the dataset. -func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorGetNextFromShard", - Input: []tf.Input{ - multi_device_iterator, shard_num, incarnation_id, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err) - return - } - return components -} - -// Creates a MultiDeviceIterator resource. -// -// Arguments: -// devices: A list of devices the iterator works across. -// shared_name: If non-empty, this resource will be shared under the given name -// across multiple sessions. -// container: If non-empty, this resource is placed in the given container. -// Otherwise, a default container is used. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -// -// Returns Handle to the resource created. -func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "MultiDeviceIterator", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BoostedTreesCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesCalculateBestFeatureSplit. -type BoostedTreesCalculateBestFeatureSplitAttr func(optionalAttr) - -// BoostedTreesCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value. -// -// value: A string indicating if this Op should perform inequality split or equality split. -// If not specified, defaults to "inequality" -func BoostedTreesCalculateBestFeatureSplitSplitType(value string) BoostedTreesCalculateBestFeatureSplitAttr { - return func(m optionalAttr) { - m["split_type"] = value - } -} - -// Calculates gains for each feature and returns the best possible split information for the feature. -// -// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. -// -// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. -// -// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). -// -// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature. -// -// Arguments: -// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). -// stats_summary: A Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature. -// The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// tree_complexity: adjustment to the gain, per leaf based. -// min_node_weight: minimum avg of hessians in a node before required for the node to be considered for splitting. -// logits_dimension: The dimension of logit, i.e., number of classes. -// -// Returns: -// node_ids: A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes. -// gains: A Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes. -// feature_dimensions: A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes. -// thresholds: A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes. -// left_node_contribs: A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes. -// right_node_contribs: A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. -// split_with_default_directions: A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes. -// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2. -func BoostedTreesCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesCalculateBestFeatureSplit", - Input: []tf.Input{ - node_id_range, stats_summary, l1, l2, tree_complexity, min_node_weight, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// ModelDatasetAttr is an optional argument to ModelDataset. -type ModelDatasetAttr func(optionalAttr) - -// ModelDatasetAlgorithm sets the optional algorithm attribute to value. -// If not specified, defaults to 0 -func ModelDatasetAlgorithm(value int64) ModelDatasetAttr { - return func(m optionalAttr) { - m["algorithm"] = value - } -} - -// ModelDatasetCpuBudget sets the optional cpu_budget attribute to value. -// If not specified, defaults to 0 -func ModelDatasetCpuBudget(value int64) ModelDatasetAttr { - return func(m optionalAttr) { - m["cpu_budget"] = value - } -} - -// Identity transformation that models performance. -// -// Identity transformation that models performance. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// -// -func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ModelDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ModelDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a list of tensors with the same shapes and contents as the input -// -// tensors. -// -// This op can be used to override the gradient for complicated functions. For -// example, suppose y = f(x) and we wish to apply a custom function g for backprop -// such that dx = g(dy). In Python, -// -// ```python -// with tf.get_default_graph().gradient_override_map( -// {'IdentityN': 'OverrideGradientWithG'}): -// y, _ = identity_n([f(x), x]) -// -// @tf.RegisterGradient('OverrideGradientWithG') -// def ApplyG(op, dy, _): -// return [None, g(dy)] # Do not backprop to f(x). -// ``` -func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IdentityN", - Input: []tf.Input{ - tf.OutputList(input), - }, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("IdentityN", err) - return - } - return output -} - -// Returns true if and only if the given Optional variant has a value. -func OptionalHasValue(scope *Scope, optional tf.Output) (has_value tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OptionalHasValue", - Input: []tf.Input{ - optional, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Constructs an Optional variant from a tuple of tensors. -func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OptionalFromValue", - Input: []tf.Input{ - tf.OutputList(components), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OptimizeDatasetAttr is an optional argument to OptimizeDataset. -type OptimizeDatasetAttr func(optionalAttr) - -// OptimizeDatasetOptimizationConfigs sets the optional optimization_configs attribute to value. -// If not specified, defaults to <> -func OptimizeDatasetOptimizationConfigs(value []string) OptimizeDatasetAttr { - return func(m optionalAttr) { - m["optimization_configs"] = value - } -} - -// Creates a dataset by applying optimizations to `input_dataset`. -// -// Creates a dataset by applying optimizations to `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use. -// -// -func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OptimizeDataset", - Input: []tf.Input{ - input_dataset, optimizations, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts the given `resource_handle` representing an iterator to a string. -// -// Arguments: -// resource_handle: A handle to an iterator resource. -// -// Returns A string representation of the given handle. -func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IteratorToStringHandle", - Input: []tf.Input{ - resource_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gets the next output from the given iterator. -// -// This operation is a synchronous version IteratorGetNext. It should only be used -// in situations where the iterator does not block the calling thread, or where -// the calling thread is not a member of the thread pool used to execute parallel -// operations (e.g. in eager mode). -func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "IteratorGetNextSync", - Input: []tf.Input{ - iterator, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("IteratorGetNextSync", err) - return - } - return components -} - -// RaggedCountSparseOutputAttr is an optional argument to RaggedCountSparseOutput. -type RaggedCountSparseOutputAttr func(optionalAttr) - -// RaggedCountSparseOutputMinlength sets the optional minlength attribute to value. -// -// value: Minimum value to count. Can be set to -1 for no minimum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func RaggedCountSparseOutputMinlength(value int64) RaggedCountSparseOutputAttr { - return func(m optionalAttr) { - m["minlength"] = value - } -} - -// RaggedCountSparseOutputMaxlength sets the optional maxlength attribute to value. -// -// value: Maximum value to count. Can be set to -1 for no maximum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func RaggedCountSparseOutputMaxlength(value int64) RaggedCountSparseOutputAttr { - return func(m optionalAttr) { - m["maxlength"] = value - } -} - -// Performs sparse-output bin counting for a ragged tensor input. -// -// Counts the number of times each value occurs in the input. -// -// Arguments: -// splits: Tensor containing the row splits of the ragged tensor to count. -// values: Tensor containing values of the sparse tensor to count. -// weights: A Tensor of the same shape as indices containing per-index weight values. -// May also be the empty tensor if no weights are used. -// binary_output: Whether to output the number of occurrences of each value or 1. -// -// Returns: -// output_indices: Indices tensor for the resulting sparse tensor object. -// output_values: Values tensor for the resulting sparse tensor object. -// output_dense_shape: Shape tensor for the resulting sparse tensor object. -// END -// } -// attr { -// name: "T" -// description: < 0. -func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FixedLengthRecordDataset", - Input: []tf.Input{ - filenames, header_bytes, record_bytes, footer_bytes, buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the lines of one or more text files. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TextLineDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A container for an iterator resource. -// -// Arguments: -// multi_device_iterator: A handle to the multi device iterator to delete. -// iterators: A list of iterator handles (unused). This is added so that automatic control dependencies get added during function tracing that ensure this op runs after all the dependent iterators are deleted. -// deleter: A variant deleter. -// -// Returns the created operation. -func DeleteMultiDeviceIterator(scope *Scope, multi_device_iterator tf.Output, iterators []tf.Output, deleter tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DeleteMultiDeviceIterator", - Input: []tf.Input{ - multi_device_iterator, tf.OutputList(iterators), deleter, - }, - } - return scope.AddOperation(opspec) -} - -// Creates a dataset with a range of values. Corresponds to python's xrange. -// -// Arguments: -// start: corresponds to start in python's xrange(). -// stop: corresponds to stop in python's xrange(). -// step: corresponds to step in python's xrange(). -// -// -func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RangeDataset", - Input: []tf.Input{ - start, stop, step, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that batches and pads `batch_size` elements from the input. -// -// Arguments: -// -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// padded_shapes: A list of int64 tensors representing the desired padded shapes -// of the corresponding output components. These shapes may be partially -// specified, using `-1` to indicate that a particular dimension should be -// padded to the maximum size of all batch elements. -// padding_values: A list of scalars containing the padding value to use for -// each of the outputs. -// -func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "PaddedBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BatchDatasetV2Attr is an optional argument to BatchDatasetV2. -type BatchDatasetV2Attr func(optionalAttr) - -// BatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value. -// If not specified, defaults to false -func BatchDatasetV2ParallelCopy(value bool) BatchDatasetV2Attr { - return func(m optionalAttr) { - m["parallel_copy"] = value - } -} - -// Creates a dataset that batches `batch_size` elements from `input_dataset`. -// -// Arguments: -// -// batch_size: A scalar representing the number of elements to accumulate in a batch. -// drop_remainder: A scalar representing whether the last batch should be dropped in case its size -// is smaller than desired. -// -// -func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...BatchDatasetV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BatchDatasetV2", - Input: []tf.Input{ - input_dataset, batch_size, drop_remainder, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ShuffleDatasetAttr is an optional argument to ShuffleDataset. -type ShuffleDatasetAttr func(optionalAttr) - -// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. -// -// value: If true, each iterator over this dataset will be given -// a different pseudorandomly generated seed, based on a sequence seeded by the -// `seed` and `seed2` inputs. If false, each iterator will be given the same -// seed, and repeated iteration over this dataset will yield the exact same -// sequence of results. -// If not specified, defaults to true -func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr { - return func(m optionalAttr) { - m["reshuffle_each_iteration"] = value - } -} - -// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly. -// -// Arguments: -// -// buffer_size: The number of output elements to buffer in an iterator over -// this dataset. Compare with the `min_after_dequeue` attr when creating a -// `RandomShuffleQueue`. -// seed: A scalar seed for the random number generator. If either `seed` or -// `seed2` is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// -// -func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ShuffleDataset", - Input: []tf.Input{ - input_dataset, buffer_size, seed, seed2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset containing elements of first component of `input_dataset` having true in the last component. -func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "FilterByLastComponentDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PrefetchDatasetAttr is an optional argument to PrefetchDataset. -type PrefetchDatasetAttr func(optionalAttr) - -// PrefetchDatasetSlackPeriod sets the optional slack_period attribute to value. -// If not specified, defaults to 0 -func PrefetchDatasetSlackPeriod(value int64) PrefetchDatasetAttr { - return func(m optionalAttr) { - m["slack_period"] = value - } -} - -// PrefetchDatasetLegacyAutotune sets the optional legacy_autotune attribute to value. -// If not specified, defaults to true -func PrefetchDatasetLegacyAutotune(value bool) PrefetchDatasetAttr { - return func(m optionalAttr) { - m["legacy_autotune"] = value - } -} - -// Creates a dataset that asynchronously prefetches elements from `input_dataset`. -// -// Arguments: -// -// buffer_size: The maximum number of elements to buffer in an iterator over -// this dataset. -// -// -func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...PrefetchDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PrefetchDataset", - Input: []tf.Input{ - input_dataset, buffer_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Forwards the input to the output. -// -// This operator represents the loop termination condition used by the -// "pivot" switches of a loop. -// -// Arguments: -// input: A boolean scalar, representing the branch predicate of the Switch op. -// -// Returns The same tensor as `input`. -func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LoopCond", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that skips `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. -// -// -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SkipDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the outputs of `input_dataset` `count` times. -// -// Arguments: -// -// count: A scalar representing the number of times that `input_dataset` should -// be repeated. A value of `-1` indicates that it should be repeated infinitely. -// -// -func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RepeatDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UnpackAttr is an optional argument to Unpack. -type UnpackAttr func(optionalAttr) - -// UnpackAxis sets the optional axis attribute to value. -// -// value: Dimension along which to unpack. Negative values wrap around, so the -// valid range is `[-R, R)`. -// If not specified, defaults to 0 -func UnpackAxis(value int64) UnpackAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. -// -// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. -// For example, given a tensor of shape `(A, B, C, D)`; -// -// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` -// and each tensor in `output` will have shape `(B, C, D)`. (Note that the -// dimension unpacked along is gone, unlike `split`). -// -// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` -// and each tensor in `output` will have shape `(A, C, D)`. -// Etc. -// -// This is the opposite of `pack`. -// -// Arguments: -// value: 1-D or higher, with `axis` dimension size equal to `num`. -// -// -// Returns The list of tensors unpacked from `value`. -func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num": num} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unpack", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Unpack", err) - return - } - return output -} - -// Creates a dataset that concatenates `input_dataset` with `another_dataset`. -func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ConcatenateDataset", - Input: []tf.Input{ - input_dataset, another_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A placeholder op for a value that will be fed into the computation. -// -// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2. -// -// N.B. This operation will fail with an error if it is executed. It is -// intended as a way to represent a value that will always be fed, and to -// provide attrs that enable the fed value to be checked at runtime. -// -// Arguments: -// dtype: The type of elements in the tensor. -// shape: The shape of the tensor. The shape can be any partially-specified -// shape. To be unconstrained, pass in a shape with unknown rank. -// -// Returns A placeholder tensor that must be replaced using the feed mechanism. -func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - opspec := tf.OpSpec{ - Type: "PlaceholderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2. -type RandomShuffleQueueV2Attr func(optionalAttr) - -// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value. -// -// value: Dequeue will block unless there would be this -// many elements after the dequeue or the queue is closed. This -// ensures a minimum level of mixing of elements. -// If not specified, defaults to 0 -func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["min_after_dequeue"] = value - } -} - -// RandomShuffleQueueV2Seed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// RandomShuffleQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that randomizes the order of elements. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffleQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that splits a SparseTensor into elements row-wise. -func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseTensorSliceDataset", - Input: []tf.Input{ - indices, values, dense_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits `components` as a tuple of tensors once. -func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "TensorDataset", - Input: []tf.Input{ - tf.OutputList(components), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QueueCloseV2Attr is an optional argument to QueueCloseV2. -type QueueCloseV2Attr func(optionalAttr) - -// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value. -// -// value: If true, all pending enqueue requests that are -// blocked on the given queue will be canceled. -// If not specified, defaults to false -func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr { - return func(m optionalAttr) { - m["cancel_pending_enqueues"] = value - } -} - -// Closes the given queue. -// -// This operation signals that no more elements will be enqueued in the -// given queue. Subsequent Enqueue(Many) operations will fail. -// Subsequent Dequeue(Many) operations will continue to succeed if -// sufficient elements remain in the queue. Subsequent Dequeue(Many) -// operations that would block will fail immediately. -// -// Arguments: -// handle: The handle to a queue. -// -// Returns the created operation. -func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueCloseV2", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// DebugIdentityV2Attr is an optional argument to DebugIdentityV2. -type DebugIdentityV2Attr func(optionalAttr) - -// DebugIdentityV2TfdbgContextId sets the optional tfdbg_context_id attribute to value. -// -// value: A tfdbg-generated ID for the context that the op belongs to, -// e.g., a concrete compiled tf.function. -// If not specified, defaults to "" -func DebugIdentityV2TfdbgContextId(value string) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["tfdbg_context_id"] = value - } -} - -// DebugIdentityV2OpName sets the optional op_name attribute to value. -// -// value: Optional. Name of the op that the debug op is concerned with. -// Used only for single-tensor trace. -// If not specified, defaults to "" -func DebugIdentityV2OpName(value string) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["op_name"] = value - } -} - -// DebugIdentityV2OutputSlot sets the optional output_slot attribute to value. -// -// value: Optional. Output slot index of the tensor that the debug op -// is concerned with. Used only for single-tensor trace. -// If not specified, defaults to -1 -func DebugIdentityV2OutputSlot(value int64) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["output_slot"] = value - } -} - -// DebugIdentityV2TensorDebugMode sets the optional tensor_debug_mode attribute to value. -// -// value: TensorDebugMode enum value. See debug_event.proto for details. -// If not specified, defaults to -1 -func DebugIdentityV2TensorDebugMode(value int64) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["tensor_debug_mode"] = value - } -} - -// DebugIdentityV2DebugUrls sets the optional debug_urls attribute to value. -// -// value: List of URLs to debug targets, e.g., file:///foo/tfdbg_dump. -// If not specified, defaults to <> -func DebugIdentityV2DebugUrls(value []string) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["debug_urls"] = value - } -} - -// DebugIdentityV2CircularBufferSize sets the optional circular_buffer_size attribute to value. -// If not specified, defaults to 1000 -func DebugIdentityV2CircularBufferSize(value int64) DebugIdentityV2Attr { - return func(m optionalAttr) { - m["circular_buffer_size"] = value - } -} - -// Debug Identity V2 Op. -// -// Provides an identity mapping from input to output, while writing the content of -// the input tensor by calling DebugEventsWriter. -// -// The semantics of the input tensor depends on tensor_debug_mode. In typical -// usage, the input tensor comes directly from the user computation only when -// graph_debug_mode is FULL_TENSOR (see protobuf/debug_event.proto for a -// list of all the possible values of graph_debug_mode). For the other debug modes, -// the input tensor should be produced by an additional op or subgraph that -// computes summary information about one or more tensors. -// -// Arguments: -// input: Input tensor, non-Reference type -func DebugIdentityV2(scope *Scope, input tf.Output, optional ...DebugIdentityV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DebugIdentityV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DebugNanCountAttr is an optional argument to DebugNanCount. -type DebugNanCountAttr func(optionalAttr) - -// DebugNanCountDeviceName sets the optional device_name attribute to value. -// If not specified, defaults to "" -func DebugNanCountDeviceName(value string) DebugNanCountAttr { - return func(m optionalAttr) { - m["device_name"] = value - } -} - -// DebugNanCountTensorName sets the optional tensor_name attribute to value. -// -// value: Name of the input tensor. -// If not specified, defaults to "" -func DebugNanCountTensorName(value string) DebugNanCountAttr { - return func(m optionalAttr) { - m["tensor_name"] = value - } -} - -// DebugNanCountDebugUrls sets the optional debug_urls attribute to value. -// -// value: List of URLs to debug targets, e.g., -// file:///foo/tfdbg_dump, grpc:://localhost:11011. -// If not specified, defaults to <> -func DebugNanCountDebugUrls(value []string) DebugNanCountAttr { - return func(m optionalAttr) { - m["debug_urls"] = value - } -} - -// DebugNanCountGatedGrpc sets the optional gated_grpc attribute to value. -// -// value: Whether this op will be gated. If any of the debug_urls of this -// debug node is of the grpc:// scheme, when the value of this attribute is set -// to True, the data will not actually be sent via the grpc stream unless this -// debug op has been enabled at the debug_url. If all of the debug_urls of this -// debug node are of the grpc:// scheme and the debug op is enabled at none of -// them, the output will be an empty Tensor. -// If not specified, defaults to false -func DebugNanCountGatedGrpc(value bool) DebugNanCountAttr { - return func(m optionalAttr) { - m["gated_grpc"] = value - } -} - -// Debug NaN Value Counter Op. -// -// Counts number of NaNs in the input tensor, for debugging. -// -// Arguments: -// input: Input tensor, non-Reference type. -func DebugNanCount(scope *Scope, input tf.Output, optional ...DebugNanCountAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DebugNanCount", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DebugIdentityAttr is an optional argument to DebugIdentity. -type DebugIdentityAttr func(optionalAttr) - -// DebugIdentityDeviceName sets the optional device_name attribute to value. -// -// value: Name of the device on which the tensor resides. -// If not specified, defaults to "" -func DebugIdentityDeviceName(value string) DebugIdentityAttr { - return func(m optionalAttr) { - m["device_name"] = value - } -} - -// DebugIdentityTensorName sets the optional tensor_name attribute to value. -// -// value: Name of the input tensor. -// If not specified, defaults to "" -func DebugIdentityTensorName(value string) DebugIdentityAttr { - return func(m optionalAttr) { - m["tensor_name"] = value - } -} - -// DebugIdentityDebugUrls sets the optional debug_urls attribute to value. -// -// value: List of URLs to debug targets, e.g., -// file:///foo/tfdbg_dump, grpc:://localhost:11011 -// If not specified, defaults to <> -func DebugIdentityDebugUrls(value []string) DebugIdentityAttr { - return func(m optionalAttr) { - m["debug_urls"] = value - } -} - -// DebugIdentityGatedGrpc sets the optional gated_grpc attribute to value. -// -// value: Whether this op will be gated. If any of the debug_urls of this -// debug node is of the grpc:// scheme, when the value of this attribute is set -// to True, the data will not actually be sent via the grpc stream unless this -// debug op has been enabled at the debug_url. If all of the debug_urls of this -// debug node are of the grpc:// scheme and the debug op is enabled at none of -// them, the output will be an empty Tensor. -// If not specified, defaults to false -func DebugIdentityGatedGrpc(value bool) DebugIdentityAttr { - return func(m optionalAttr) { - m["gated_grpc"] = value - } -} - -// Provides an identity mapping of the non-Ref type input tensor for debugging. -// -// Provides an identity mapping of the non-Ref type input tensor for debugging. -// -// Arguments: -// input: Input tensor, non-Reference type -func DebugIdentity(scope *Scope, input tf.Output, optional ...DebugIdentityAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DebugIdentity", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Aggregates the summary of accumulated stats for the batch. -// -// The summary stats contains gradients and hessians accumulated for each node, bucket and dimension id. -// -// Arguments: -// node_ids: int32; Rank 1 Tensor containing node ids for each example, shape [batch_size]. -// gradients: float32; Rank 2 Tensor (shape=[batch_size, logits_dimension]) with gradients for each example. -// hessians: float32; Rank 2 Tensor (shape=[batch_size, hessian_dimension]) with hessians for each example. -// feature_indices: int32; Rank 2 indices of feature sparse Tensors (shape=[number of sparse entries, 2]). -// Number of sparse entries across all instances from the batch. The first value is -// the index of the instance, the second is dimension of the feature. The second axis -// can only have 2 values, i.e., the input dense version of Tensor can only be matrix. -// feature_values: int32; Rank 1 values of feature sparse Tensors (shape=[number of sparse entries]). -// Number of sparse entries across all instances from the batch. The first value is -// the index of the instance, the second is dimension of the feature. -// feature_shape: int32; Rank 1 dense shape of feature sparse Tensors (shape=[2]). -// The first axis can only have 2 values, [batch_size, feature_dimension]. -// max_splits: int; the maximum number of splits possible in the whole tree. -// num_buckets: int; equals to the maximum possible value of bucketized feature + 1. -// -// Returns: -// stats_summary_indices: int32; Rank 2 indices of summary sparse Tensors (shape=[number of non zero statistics, 4]) -// The second axis can only be 4 including node id, feature dimension, bucket id, and statistics_dimension. -// statistics_dimension = logits_dimension + hessian_dimension. -// stats_summary_values: output Rank 1 Tensor (shape=[number of non zero statistics]) -// stats_summary_shape: output Rank 1 Tensor (shape=[4]) -// The tensor has following 4 values: [max_splits, feature_dimension, num_buckets, statistics_dimension], -// where statistics_dimension = gradient_dimension + hessian_dimension. gradient_dimension -// is the same as label_dimension, i.e., the output space. hessian_dimension can be the same -// as logits dimension when diagonal hessian is used, or label_dimension^2 when full -// hessian is used. -func BoostedTreesSparseAggregateStats(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, feature_indices tf.Output, feature_values tf.Output, feature_shape tf.Output, max_splits int64, num_buckets int64) (stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "BoostedTreesSparseAggregateStats", - Input: []tf.Input{ - node_ids, gradients, hessians, feature_indices, feature_values, feature_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DecodeProtoV2Attr is an optional argument to DecodeProtoV2. -type DecodeProtoV2Attr func(optionalAttr) - -// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value. -// -// value: Either the special value `local://` or a path to a file containing -// a serialized `FileDescriptorSet`. -// If not specified, defaults to "local://" -func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { - return func(m optionalAttr) { - m["descriptor_source"] = value - } -} - -// DecodeProtoV2MessageFormat sets the optional message_format attribute to value. -// -// value: Either `binary` or `text`. -// If not specified, defaults to "binary" -func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr { - return func(m optionalAttr) { - m["message_format"] = value - } -} - -// DecodeProtoV2Sanitize sets the optional sanitize attribute to value. -// -// value: Whether to sanitize the result or not. -// If not specified, defaults to false -func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr { - return func(m optionalAttr) { - m["sanitize"] = value - } -} - -// The op extracts fields from a serialized protocol buffers message into tensors. -// -// The `decode_proto` op extracts fields from a serialized protocol buffers -// message into tensors. The fields in `field_names` are decoded and converted -// to the corresponding `output_types` if possible. -// -// A `message_type` name must be provided to give context for the field names. -// The actual message descriptor can be looked up either in the linked-in -// descriptor pool or a filename provided by the caller using the -// `descriptor_source` attribute. -// -// Each output tensor is a dense tensor. This means that it is padded to hold -// the largest number of repeated elements seen in the input minibatch. (The -// shape is also padded by one to prevent zero-sized dimensions). The actual -// repeat counts for each example in the minibatch can be found in the `sizes` -// output. In many cases the output of `decode_proto` is fed immediately into -// tf.squeeze if missing values are not a concern. When using tf.squeeze, always -// pass the squeeze dimension explicitly to avoid surprises. -// -// For the most part, the mapping between Proto field types and TensorFlow dtypes -// is straightforward. However, there are a few special cases: -// -// - A proto field that contains a submessage or group can only be converted -// to `DT_STRING` (the serialized submessage). This is to reduce the complexity -// of the API. The resulting string can be used as input to another instance of -// the decode_proto op. -// -// - TensorFlow lacks support for unsigned integers. The ops represent uint64 -// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious -// way). Unsigned int32 values can be represented exactly by specifying type -// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in -// the `output_types` attribute. -// -// Both binary and text proto serializations are supported, and can be -// chosen using the `format` attribute. -// -// The `descriptor_source` attribute selects the source of protocol -// descriptors to consult when looking up `message_type`. This may be: -// -// - An empty string or "local://", in which case protocol descriptors are -// created for C++ (not Python) proto definitions linked to the binary. -// -// - A file, in which case protocol descriptors are created from the file, -// which is expected to contain a `FileDescriptorSet` serialized as a string. -// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out` -// and `--include_imports` options to the protocol compiler `protoc`. -// -// - A "bytes://", in which protocol descriptors are created from ``, -// which is expected to be a `FileDescriptorSet` serialized as a string. -// -// Arguments: -// bytes: Tensor of serialized protos with shape `batch_shape`. -// message_type: Name of the proto message type to decode. -// field_names: List of strings containing proto field names. An extension field can be decoded -// by using its full name, e.g. EXT_PACKAGE.EXT_FIELD_NAME. -// output_types: List of TF types to use for the respective field in field_names. -// -// Returns: -// sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`. -// Each entry is the number of values found for the corresponding field. -// Optional fields may have 0 or 1 values. -// values: List of tensors containing values for the corresponding field. -// `values[i]` has datatype `output_types[i]` -// and shape `[batch_shape, max(sizes[...,i])]`. -func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeProtoV2", - Input: []tf.Input{ - bytes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - sizes = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("DecodeProtoV2", err) - return - } - return sizes, values -} - -// Output the logits for the given input data -// -// Arguments: -// tree_handle: Handle to the tree resource. -// dense_features: Rank 2 dense features tensor. -// logits_dimension: Scalar, dimension of the logits. -// -// Returns The logits predictions from the tree for each instance in the batch. -func TensorForestTreePredict(scope *Scope, tree_handle tf.Output, dense_features tf.Output, logits_dimension int64) (logits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "TensorForestTreePredict", - Input: []tf.Input{ - tree_handle, dense_features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodeProtoAttr is an optional argument to EncodeProto. -type EncodeProtoAttr func(optionalAttr) - -// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value. -// If not specified, defaults to "local://" -func EncodeProtoDescriptorSource(value string) EncodeProtoAttr { - return func(m optionalAttr) { - m["descriptor_source"] = value - } -} - -// The op serializes protobuf messages provided in the input tensors. -// -// The types of the tensors in `values` must match the schema for the fields -// specified in `field_names`. All the tensors in `values` must have a common -// shape prefix, *batch_shape*. -// -// The `sizes` tensor specifies repeat counts for each field. The repeat count -// (last dimension) of a each tensor in `values` must be greater than or equal -// to corresponding repeat count in `sizes`. -// -// A `message_type` name must be provided to give context for the field names. -// The actual message descriptor can be looked up either in the linked-in -// descriptor pool or a filename provided by the caller using the -// `descriptor_source` attribute. -// -// For the most part, the mapping between Proto field types and TensorFlow dtypes -// is straightforward. However, there are a few special cases: -// -// - A proto field that contains a submessage or group can only be converted -// to `DT_STRING` (the serialized submessage). This is to reduce the complexity -// of the API. The resulting string can be used as input to another instance of -// the decode_proto op. -// -// - TensorFlow lacks support for unsigned integers. The ops represent uint64 -// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious -// way). Unsigned int32 values can be represented exactly by specifying type -// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in -// the `output_types` attribute. -// -// The `descriptor_source` attribute selects the source of protocol -// descriptors to consult when looking up `message_type`. This may be: -// -// - An empty string or "local://", in which case protocol descriptors are -// created for C++ (not Python) proto definitions linked to the binary. -// -// - A file, in which case protocol descriptors are created from the file, -// which is expected to contain a `FileDescriptorSet` serialized as a string. -// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out` -// and `--include_imports` options to the protocol compiler `protoc`. -// -// - A "bytes://", in which protocol descriptors are created from ``, -// which is expected to be a `FileDescriptorSet` serialized as a string. -// -// Arguments: -// sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`. -// values: List of tensors containing values for the corresponding field. -// field_names: List of strings containing proto field names. -// message_type: Name of the proto message type to decode. -// -// Returns Tensor of serialized protos with shape `batch_shape`. -func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EncodeProto", - Input: []tf.Input{ - sizes, tf.OutputList(values), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Registers a dataset with the tf.data service. -func RegisterDataset(scope *Scope, dataset tf.Output, address tf.Output, protocol tf.Output, external_state_policy int64) (dataset_id tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"external_state_policy": external_state_policy} - opspec := tf.OpSpec{ - Type: "RegisterDataset", - Input: []tf.Input{ - dataset, address, protocol, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DataServiceDatasetAttr is an optional argument to DataServiceDataset. -type DataServiceDatasetAttr func(optionalAttr) - -// DataServiceDatasetTaskRefreshIntervalHintMs sets the optional task_refresh_interval_hint_ms attribute to value. -// If not specified, defaults to -1 -func DataServiceDatasetTaskRefreshIntervalHintMs(value int64) DataServiceDatasetAttr { - return func(m optionalAttr) { - m["task_refresh_interval_hint_ms"] = value - } -} - -// Creates a dataset that reads data from the tf.data service. -func DataServiceDataset(scope *Scope, dataset_id tf.Output, processing_mode tf.Output, address tf.Output, protocol tf.Output, job_name tf.Output, max_outstanding_requests tf.Output, iteration_counter tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...DataServiceDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DataServiceDataset", - Input: []tf.Input{ - dataset_id, processing_mode, address, protocol, job_name, max_outstanding_requests, iteration_counter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains the unique elements of `input_dataset`. -func UniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "UniqueDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A dataset that splits the elements of its input into multiple elements. -func ExperimentalUnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalUnbatchDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A dataset that splits the elements of its input into multiple elements. -func UnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "UnbatchDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// -// thread_pool: A resource produced by the ThreadPoolHandle op. -// -// -func ExperimentalThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalThreadPoolDataset", - Input: []tf.Input{ - input_dataset, thread_pool, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gets the next output from the given iterator as an Optional variant. -func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "IteratorGetNextAsOptional", - Input: []tf.Input{ - iterator, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces a summary of any statistics recorded by the given statistics manager. -func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatsAggregatorSummary", - Input: []tf.Input{ - iterator, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExperimentalStatsAggregatorHandleAttr is an optional argument to ExperimentalStatsAggregatorHandle. -type ExperimentalStatsAggregatorHandleAttr func(optionalAttr) - -// ExperimentalStatsAggregatorHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ExperimentalStatsAggregatorHandleContainer(value string) ExperimentalStatsAggregatorHandleAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// ExperimentalStatsAggregatorHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func ExperimentalStatsAggregatorHandleSharedName(value string) ExperimentalStatsAggregatorHandleAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a statistics manager resource. -func ExperimentalStatsAggregatorHandle(scope *Scope, optional ...ExperimentalStatsAggregatorHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExperimentalStatsAggregatorHandle", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle. -type StatsAggregatorHandleAttr func(optionalAttr) - -// StatsAggregatorHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StatsAggregatorHandleContainer(value string) StatsAggregatorHandleAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StatsAggregatorHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StatsAggregatorHandleSharedName(value string) StatsAggregatorHandleAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a statistics manager resource. -func StatsAggregatorHandle(scope *Scope, optional ...StatsAggregatorHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatsAggregatorHandle", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that executes a SQL query and emits rows of the result set. -// -// Arguments: -// driver_name: The database type. Currently, the only supported type is 'sqlite'. -// data_source_name: A connection string to connect to the database. -// query: A SQL query to execute. -// -// -func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalSqlDataset", - Input: []tf.Input{ - driver_name, data_source_name, query, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generate the bucket boundaries for each feature based on accumulated summaries. -// -// An op that returns a list of float tensors for a quantile stream resource. Each -// tensor is Rank 1 containing bucket boundaries for a single feature. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// num_features: inferred int; number of features to get bucket boundaries for. -// -// Returns float; List of Rank 1 Tensors each containing the bucket boundaries for a feature. -func BoostedTreesQuantileStreamResourceGetBucketBoundaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (bucket_boundaries []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_features": num_features} - opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceGetBucketBoundaries", - Input: []tf.Input{ - quantile_stream_resource_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if bucket_boundaries, idx, err = makeOutputList(op, idx, "bucket_boundaries"); err != nil { - scope.UpdateErr("BoostedTreesQuantileStreamResourceGetBucketBoundaries", err) - return - } - return bucket_boundaries -} - -// Creates a dataset that passes a sliding window over `input_dataset`. -// -// Arguments: -// -// window_size: A scalar representing the number of elements in the -// sliding window. -// window_shift: A scalar representing the steps moving the sliding window -// forward in one iteration. It must be positive. -// window_stride: A scalar representing the stride of the input elements of the sliding window. -// It must be positive. -// -// -func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalSlidingWindowDataset", - Input: []tf.Input{ - input_dataset, window_size, window_shift, window_stride, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArraySizeV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArraySizeV3 -func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArraySizeV2", - Input: []tf.Input{ - handle, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Elementwise computes the bitwise right-shift of `x` and `y`. -// -// Performs a logical shift for unsigned integer types, and an arithmetic shift -// for signed integer types. -// -// If `y` is negative, or greater than or equal to than the width of `x` in bits -// the result is implementation defined. -// -// Example: -// -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// import numpy as np -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64] -// -// for dtype in dtype_list: -// lhs = tf.constant([-1, -5, -3, -14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// -// right_shift_result = bitwise_ops.right_shift(lhs, rhs) -// -// print(right_shift_result) -// -// # This will print: -// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int8) -// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int16) -// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int32) -// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int64) -// -// lhs = np.array([-2, 64, 101, 32], dtype=np.int8) -// rhs = np.array([-1, -5, -3, -14], dtype=np.int8) -// bitwise_ops.right_shift(lhs, rhs) -// # -// ``` -// -func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RightShift", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RebatchDatasetAttr is an optional argument to RebatchDataset. -type RebatchDatasetAttr func(optionalAttr) - -// RebatchDatasetUseFallback sets the optional use_fallback attribute to value. -// If not specified, defaults to true -func RebatchDatasetUseFallback(value bool) RebatchDatasetAttr { - return func(m optionalAttr) { - m["use_fallback"] = value - } -} - -// Creates a dataset that changes the batch size. -// -// Creates a dataset that changes the batch size of the dataset to current batch -// size // num_workers. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_replicas: A scalar representing the number of replicas to distribute this batch across. As -// a result of this transformation the current batch size would end up being -// divided by this parameter. -// -// -func RebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...RebatchDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RebatchDataset", - Input: []tf.Input{ - input_dataset, num_replicas, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// -// num_threads: Identifies the number of threads to use for the private threadpool. -// -// -func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalPrivateThreadPoolDataset", - Input: []tf.Input{ - input_dataset, num_threads, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// -// num_threads: Identifies the number of threads to use for the private threadpool. -// -// -func PrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "PrivateThreadPoolDataset", - Input: []tf.Input{ - input_dataset, num_threads, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset. -type ExperimentalParseExampleDatasetAttr func(optionalAttr) - -// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value. -// If not specified, defaults to false -func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr { - return func(m optionalAttr) { - m["sloppy"] = value - } -} - -// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features. -// -// Arguments: -// -// -// dense_defaults: A dict mapping string keys to `Tensor`s. -// The keys of the dict must match the dense_keys of the feature. -// sparse_keys: A list of string keys in the examples features. -// The results for these keys will be returned as `SparseTensor` objects. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples features associated with dense values. -// sparse_types: A list of `DTypes` of the same length as `sparse_keys`. -// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), -// and `tf.string` (`BytesList`) are supported. -// dense_shapes: List of tuples with the same length as `dense_keys`. -// The shape of the data for each dense feature referenced by `dense_keys`. -// Required for any input tensors identified by `dense_keys`. Must be -// either fully defined, or may contain an unknown first dimension. -// An unknown first dimension means the feature is treated as having -// a variable number of blocks, and the output shape along this dimension -// is considered unknown at graph build time. Padding is applied for -// minibatch elements smaller than the maximum number of blocks for the -// given feature along this dimension. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExperimentalParseExampleDataset", - Input: []tf.Input{ - input_dataset, num_parallel_calls, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a batched matrix tensor with new batched diagonal values. -// -// Given `input` and `diagonal`, this operation returns a tensor with the -// same shape and values as `input`, except for the main diagonal of the -// innermost matrices. These will be overwritten by the values in `diagonal`. -// -// The output is computed as follows: -// -// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has -// `k` dimensions `[I, J, K, ..., min(M, N)]`. Then the output is a -// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where: -// -// * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`. -// * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`. -// -// Arguments: -// input: Rank `k+1`, where `k >= 1`. -// diagonal: Rank `k`, where `k >= 1`. -// -// Returns Rank `k+1`, with `output.shape = input.shape`. -func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixSetDiag", - Input: []tf.Input{ - input, diagonal, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseExampleDatasetV2Attr is an optional argument to ParseExampleDatasetV2. -type ParseExampleDatasetV2Attr func(optionalAttr) - -// ParseExampleDatasetV2Deterministic sets the optional deterministic attribute to value. -// -// value: A string indicating the op-level determinism to use. Deterministic controls -// whether the dataset is allowed to return elements out of order if the next -// element to be returned isn't available, but a later element is. Options are -// "true", "false", and "default". "default" indicates that determinism should be -// decided by the `experimental_deterministic` parameter of `tf.data.Options`. -// If not specified, defaults to "default" -func ParseExampleDatasetV2Deterministic(value string) ParseExampleDatasetV2Attr { - return func(m optionalAttr) { - m["deterministic"] = value - } -} - -// ParseExampleDatasetV2RaggedKeys sets the optional ragged_keys attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetV2RaggedKeys(value []string) ParseExampleDatasetV2Attr { - return func(m optionalAttr) { - m["ragged_keys"] = value - } -} - -// ParseExampleDatasetV2RaggedValueTypes sets the optional ragged_value_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetV2RaggedValueTypes(value []tf.DataType) ParseExampleDatasetV2Attr { - return func(m optionalAttr) { - m["ragged_value_types"] = value - } -} - -// ParseExampleDatasetV2RaggedSplitTypes sets the optional ragged_split_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetV2RaggedSplitTypes(value []tf.DataType) ParseExampleDatasetV2Attr { - return func(m optionalAttr) { - m["ragged_split_types"] = value - } -} - -// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features. -// -// Arguments: -// -// -// dense_defaults: A dict mapping string keys to `Tensor`s. -// The keys of the dict must match the dense_keys of the feature. -// sparse_keys: A list of string keys in the examples features. -// The results for these keys will be returned as `SparseTensor` objects. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples features associated with dense values. -// sparse_types: A list of `DTypes` of the same length as `sparse_keys`. -// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), -// and `tf.string` (`BytesList`) are supported. -// dense_shapes: List of tuples with the same length as `dense_keys`. -// The shape of the data for each dense feature referenced by `dense_keys`. -// Required for any input tensors identified by `dense_keys`. Must be -// either fully defined, or may contain an unknown first dimension. -// An unknown first dimension means the feature is treated as having -// a variable number of blocks, and the output shape along this dimension -// is considered unknown at graph build time. Padding is applied for -// minibatch elements smaller than the maximum number of blocks for the -// given feature along this dimension. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -func ParseExampleDatasetV2(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ParseExampleDatasetV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParseExampleDatasetV2", - Input: []tf.Input{ - input_dataset, num_parallel_calls, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping. -type GenerateVocabRemappingAttr func(optionalAttr) - -// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value. -// -// value: Number of entries in the old vocab file to consider. If -1, -// use the entire old vocabulary. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr { - return func(m optionalAttr) { - m["old_vocab_size"] = value - } -} - -// Given a path to new and old vocabulary files, returns a remapping Tensor of -// -// length `num_new_vocab`, where `remapping[i]` contains the row number in the old -// vocabulary that corresponds to row `i` in the new vocabulary (starting at line -// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` -// in the new vocabulary is not in the old vocabulary. The old vocabulary is -// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the -// default value of -1. -// -// `num_vocab_offset` enables -// use in the partitioned variable case, and should generally be set through -// examining partitioning info. The format of the files should be a text file, -// with each line containing a single entity within the vocabulary. -// -// For example, with `new_vocab_file` a text file containing each of the following -// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], -// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be -// `[0, -1, 2]`. -// -// The op also returns a count of how many entries in the new vocabulary -// were present in the old vocabulary, which is used to calculate the number of -// values to initialize in a weight matrix remapping -// -// This functionality can be used to remap both row vocabularies (typically, -// features) and column vocabularies (typically, classes) from TensorFlow -// checkpoints. Note that the partitioning logic relies on contiguous vocabularies -// corresponding to div-partitioned variables. Moreover, the underlying remapping -// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should -// use the corresponding index_table_from_file() as the FeatureColumn framework -// does (as opposed to tf.feature_to_id(), which uses a CuckooTable). -// -// Arguments: -// new_vocab_file: Path to the new vocab file. -// old_vocab_file: Path to the old vocab file. -// new_vocab_offset: How many entries into the new vocab file to start reading. -// num_new_vocab: Number of entries in the new vocab file to remap. -// -// Returns: -// remapping: A Tensor of length num_new_vocab where the element at index i -// is equal to the old ID that maps to the new ID i. This element is -1 for any -// new ID that is not found in the old vocabulary. -// num_present: Number of new vocab entries found in old vocab. -func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "GenerateVocabRemapping", - Input: []tf.Input{ - new_vocab_file, old_vocab_file, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Creates a dataset that overrides the maximum intra-op parallelism. -// -// Arguments: -// -// max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use. -// -// -func ExperimentalMaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalMaxIntraOpParallelismDataset", - Input: []tf.Input{ - input_dataset, max_intra_op_parallelism, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SpaceToBatch for N-D tensors of type T. -// -// This operation divides "spatial" dimensions `[1, ..., M]` of the input into a -// grid of blocks of shape `block_shape`, and interleaves these blocks with the -// "batch" dimension (0) such that in the output, the spatial dimensions -// `[1, ..., M]` correspond to the position within the grid, and the batch -// dimension combines both the position within a spatial block and the original -// batch position. Prior to division into blocks, the spatial dimensions of the -// input are optionally zero padded according to `paddings`. See below for a -// precise description. -// -// Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has `M` dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// paddings: 2-D with shape `[M, 2]`, all values must be >= 0. -// `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension -// `i + 1`, which corresponds to spatial dimension `i`. It is required that -// `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`. -// -// This operation is equivalent to the following steps: -// -// 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the -// input according to `paddings` to produce `padded` of shape `padded_shape`. -// -// 2. Reshape `padded` to `reshaped_padded` of shape: -// -// [batch] + -// [padded_shape[1] / block_shape[0], -// block_shape[0], -// ..., -// padded_shape[M] / block_shape[M-1], -// block_shape[M-1]] + -// remaining_shape -// -// 3. Permute dimensions of `reshaped_padded` to produce -// `permuted_reshaped_padded` of shape: -// -// block_shape + -// [batch] + -// [padded_shape[1] / block_shape[0], -// ..., -// padded_shape[M] / block_shape[M-1]] + -// remaining_shape -// -// 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch -// dimension, producing an output tensor of shape: -// -// [batch * prod(block_shape)] + -// [padded_shape[1] / block_shape[0], -// ..., -// padded_shape[M] / block_shape[M-1]] + -// remaining_shape -// -// Some examples: -// -// (1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `paddings = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// The output tensor has shape `[4, 1, 1, 1]` and value: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// (2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and -// `paddings = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// The output tensor has shape `[4, 1, 1, 3]` and value: -// -// ``` -// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and -// `paddings = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// The output tensor has shape `[4, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// (4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and -// paddings = `[[0, 0], [2, 0]]`: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// The output tensor has shape `[8, 1, 3, 1]` and value: -// -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` -// -// Among others, this operation is useful for reducing atrous convolution into -// regular convolution. -func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddings tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SpaceToBatchND", - Input: []tf.Input{ - input, block_shape, paddings, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a batched diagonal tensor with given batched diagonal values. -// -// Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th -// diagonals of a matrix, with everything else padded with `padding`. `num_rows` -// and `num_cols` specify the dimension of the innermost matrix of the output. If -// both are not specified, the op assumes the innermost matrix is square and infers -// its size from `k` and the innermost dimension of `diagonal`. If only one of them -// is specified, the op assumes the unspecified value is the smallest possible -// based on other criteria. -// -// Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has -// rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one -// diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank -// `r` with shape `[I, J, ..., L, num_rows, num_cols]`. -// -// The second innermost dimension of `diagonal` has double meaning. -// When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size -// [I, J, ..., M], and the output tensor is: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper -// padding_value ; otherwise -// ``` -// -// Otherwise, `M` is treated as the number of diagonals for the matrix in the -// same batch (`M = k[1]-k[0]+1`), and the output tensor is: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1] -// padding_value ; otherwise -// ``` -// where `d = n - m`, `diag_index = k[1] - d`, and `index_in_diag = n - max(d, 0)`. -// -// For example: -// -// ``` -// # The main diagonal. -// diagonal = np.array([[1, 2, 3, 4], # Input shape: (2, 4) -// [5, 6, 7, 8]]) -// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0], # Output shape: (2, 4, 4) -// [0, 2, 0, 0], -// [0, 0, 3, 0], -// [0, 0, 0, 4]], -// [[5, 0, 0, 0], -// [0, 6, 0, 0], -// [0, 0, 7, 0], -// [0, 0, 0, 8]]] -// -// # A superdiagonal (per batch). -// diagonal = np.array([[1, 2, 3], # Input shape: (2, 3) -// [4, 5, 6]]) -// tf.matrix_diag(diagonal, k = 1) -// ==> [[[0, 1, 0, 0], # Output shape: (2, 4, 4) -// [0, 0, 2, 0], -// [0, 0, 0, 3], -// [0, 0, 0, 0]], -// [[0, 4, 0, 0], -// [0, 0, 5, 0], -// [0, 0, 0, 6], -// [0, 0, 0, 0]]] -// -// # A band of diagonals. -// diagonals = np.array([[[1, 2, 3], # Input shape: (2, 2, 3) -// [4, 5, 0]], -// [[6, 7, 9], -// [9, 1, 0]]]) -// tf.matrix_diag(diagonals, k = (-1, 0)) -// ==> [[[1, 0, 0], # Output shape: (2, 3, 3) -// [4, 2, 0], -// [0, 5, 3]], -// [[6, 0, 0], -// [9, 7, 0], -// [0, 1, 9]]] -// -// # Rectangular matrix. -// diagonal = np.array([1, 2]) # Input shape: (2) -// tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4) -// ==> [[0, 0, 0, 0], # Output shape: (3, 4) -// [1, 0, 0, 0], -// [0, 2, 0, 0]] -// -// # Rectangular matrix with inferred num_cols and padding_value = 9. -// tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding_value = 9) -// ==> [[9, 9], # Output shape: (3, 2) -// [1, 9], -// [9, 2]] -// ``` -// -// Arguments: -// diagonal: Rank `r`, where `r >= 1` -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// num_rows: The number of rows of the output matrix. If it is not provided, the op assumes -// the output matrix is a square matrix and infers the matrix size from k and the -// innermost dimension of `diagonal`. -// num_cols: The number of columns of the output matrix. If it is not provided, the op -// assumes the output matrix is a square matrix and infers the matrix size from -// k and the innermost dimension of `diagonal`. -// padding_value: The number to fill the area outside the specified diagonal band with. -// Default is 0. -// -// Returns Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise. -func MatrixDiagV2(scope *Scope, diagonal tf.Output, k tf.Output, num_rows tf.Output, num_cols tf.Output, padding_value tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiagV2", - Input: []tf.Input{ - diagonal, k, num_rows, num_cols, padding_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that overrides the maximum intra-op parallelism. -// -// Arguments: -// -// max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use. -// -// -func MaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "MaxIntraOpParallelismDataset", - Input: []tf.Input{ - input_dataset, max_intra_op_parallelism, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StageClearAttr is an optional argument to StageClear. -type StageClearAttr func(optionalAttr) - -// StageClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageClearCapacity(value int64) StageClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageClearMemoryLimit(value int64) StageClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageClearContainer(value string) StageClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageClearSharedName(value string) StageClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StageClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Records the latency of producing `input_dataset` elements in a StatsAggregator. -func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalLatencyStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the name of the device on which `resource` has been placed. -func IteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IteratorGetDevice", - Input: []tf.Input{ - resource, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a Dataset that returns pseudorandom numbers. -// -// Arguments: -// seed: A scalar seed for the random number generator. If either seed or -// seed2 is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// -// -func ExperimentalRandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalRandomDataset", - Input: []tf.Input{ - seed, seed2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains the elements of `input_dataset` ignoring errors. -func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalIgnoreErrorsDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNBackpropV2Attr is an optional argument to CudnnRNNBackpropV2. -type CudnnRNNBackpropV2Attr func(optionalAttr) - -// CudnnRNNBackpropV2RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNBackpropV2RnnMode(value string) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNBackpropV2InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNBackpropV2InputMode(value string) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNBackpropV2Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNBackpropV2Direction(value string) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNBackpropV2Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV2Dropout(value float32) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNBackpropV2Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV2Seed(value int64) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNBackpropV2Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropV2Seed2(value int64) CudnnRNNBackpropV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Backprop step of CudnnRNN. -// -// Compute the backprop of both data and weights in a RNN. Takes an extra -// "host_reserved" inupt than CudnnRNNBackprop, which is used to determine RNN -// cudnnRNNAlgo_t and cudnnMathType_t. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicates whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// output_backprop: A 3-D tensor with the same shape as output in the forward pass. -// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward -// pass. -// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward -// pass. -// reserve_space: The same reserve_space produced in the forward operation. -// host_reserved: The same host_reserved produced in the forward operation. -// input_backprop: The backprop to input in the forward pass. Has the same shape -// as input. -// input_h_backprop: The backprop to input_h in the forward pass. Has the same -// shape as input_h. -// input_c_backprop: The backprop to input_c in the forward pass. Has the same -// shape as input_c. -// params_backprop: The backprop to the params buffer in the forward pass. Has the -// same shape as params. -func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, host_reserved tf.Output, optional ...CudnnRNNBackpropV2Attr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNBackpropV2", - Input: []tf.Input{ - input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, host_reserved, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// A substitute for `InterleaveDataset` on a fixed list of `N` datasets. -// -// Arguments: -// selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the -// `N` data inputs should produce the next output element. -// data_input_datasets: `N` datasets with the same type that will be interleaved according to -// the values of `selector_input_dataset`. -// -// -func DirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DirectedInterleaveDataset", - Input: []tf.Input{ - selector_input_dataset, tf.OutputList(data_input_datasets), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that batches input elements into a SparseTensor. -// -// Arguments: -// input_dataset: A handle to an input dataset. Must have a single component. -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// row_shape: A vector representing the dense shape of each row in the produced -// SparseTensor. The shape may be partially specified, using `-1` to indicate -// that a particular dimension should use the maximum size of all batch elements. -// -// -func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalDenseToSparseBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, row_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Writes the given dataset to the given file using the TFRecord format. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to write. -// filename: A scalar string tensor representing the filename to use. -// compression_type: A scalar string tensor containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// -// Returns the created operation. -func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalDatasetToTFRecord", - Input: []tf.Input{ - input_dataset, filename, compression_type, - }, - } - return scope.AddOperation(opspec) -} - -// Creates a dataset from the given `graph_def`. -// -// Creates a dataset from the provided `graph_def`. -// -// Arguments: -// graph_def: The graph representation of the dataset (as serialized GraphDef). -// -// Returns A variant tensor representing the dataset. -func DatasetFromGraph(scope *Scope, graph_def tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DatasetFromGraph", - Input: []tf.Input{ - graph_def, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the cardinality of `input_dataset`. -// -// Returns the cardinality of `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to return cardinality for. -// -// Returns The cardinality of `input_dataset`. Named constants are used to represent -// infinite and unknown cardinality. -func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalDatasetCardinality", - Input: []tf.Input{ - input_dataset, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Interleave the values from the `data` tensors into a single tensor. -// -// Builds a merged tensor such that -// -// ```python -// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] -// ``` -// -// For example, if each `indices[m]` is scalar or vector, we have -// -// ```python -// # Scalar indices: -// merged[indices[m], ...] = data[m][...] -// -// # Vector indices: -// merged[indices[m][i], ...] = data[m][i, ...] -// ``` -// -// Each `data[i].shape` must start with the corresponding `indices[i].shape`, -// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we -// must have `data[i].shape = indices[i].shape + constant`. In terms of this -// `constant`, the output shape is -// -// merged.shape = [max(indices)] + constant -// -// Values are merged in order, so if an index appears in both `indices[m][i]` and -// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the -// merged result. If you do not need this guarantee, ParallelDynamicStitch might -// perform better on some devices. -// -// For example: -// -// ```python -// indices[0] = 6 -// indices[1] = [4, 1] -// indices[2] = [[5, 2], [0, 3]] -// data[0] = [61, 62] -// data[1] = [[41, 42], [11, 12]] -// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] -// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], -// [51, 52], [61, 62]] -// ``` -// -// This method can be used to merge partitions created by `dynamic_partition` -// as illustrated on the following example: -// -// ```python -// # Apply function (increments x_i) on elements for which a certain condition -// # apply (x_i != -1 in this example). -// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) -// condition_mask=tf.not_equal(x,tf.constant(-1.)) -// partitioned_data = tf.dynamic_partition( -// x, tf.cast(condition_mask, tf.int32) , 2) -// partitioned_data[1] = partitioned_data[1] + 1.0 -// condition_indices = tf.dynamic_partition( -// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) -// x = tf.dynamic_stitch(condition_indices, partitioned_data) -// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain -// # unchanged. -// ``` -// -//
-// -//
-func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DynamicStitch", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(data), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Uncompresses a compressed dataset element. -func UncompressElement(scope *Scope, compressed tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "UncompressElement", - Input: []tf.Input{ - compressed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("UncompressElement", err) - return - } - return components -} - -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "BytesProducedStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExperimentalAutoShardDatasetAttr is an optional argument to ExperimentalAutoShardDataset. -type ExperimentalAutoShardDatasetAttr func(optionalAttr) - -// ExperimentalAutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value. -// If not specified, defaults to 0 -func ExperimentalAutoShardDatasetAutoShardPolicy(value int64) ExperimentalAutoShardDatasetAttr { - return func(m optionalAttr) { - m["auto_shard_policy"] = value - } -} - -// Creates a dataset that shards the input dataset. -// -// Creates a dataset that shards the input dataset by num_workers, returning a -// sharded dataset for the index-th worker. This attempts to automatically shard -// a dataset by examining the Dataset graph and inserting a shard op before the -// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset). -// -// This dataset will throw a NotFound error if we cannot shard the dataset -// automatically. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_workers: A scalar representing the number of workers to distribute this dataset across. -// index: A scalar representing the index of the current worker out of num_workers. -// -// -func ExperimentalAutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalAutoShardDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExperimentalAutoShardDataset", - Input: []tf.Input{ - input_dataset, num_workers, index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A transformation that asserts which transformations happen next. -// -// This transformation checks whether the camel-case names (i.e. "FlatMap", not -// "flat_map") of the transformations following this transformation match the list -// of names in the `transformations` argument. If there is a mismatch, the -// transformation raises an exception. -// -// The check occurs when iterating over the contents of the dataset, which -// means that the check happens *after* any static optimizations are applied -// to the dataset graph. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// `AssertNextDataset` passes through the outputs of its input dataset. -// transformations: A `tf.string` vector `tf.Tensor` identifying the transformations that are -// expected to happen next. -// -// -func AssertNextDataset(scope *Scope, input_dataset tf.Output, transformations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "AssertNextDataset", - Input: []tf.Input{ - input_dataset, transformations, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Return the index of device the op runs. -func DeviceIndex(scope *Scope, device_names []string) (index tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"device_names": device_names} - opspec := tf.OpSpec{ - Type: "DeviceIndex", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ShardDatasetAttr is an optional argument to ShardDataset. -type ShardDatasetAttr func(optionalAttr) - -// ShardDatasetRequireNonEmpty sets the optional require_non_empty attribute to value. -// If not specified, defaults to false -func ShardDatasetRequireNonEmpty(value bool) ShardDatasetAttr { - return func(m optionalAttr) { - m["require_non_empty"] = value - } -} - -// Creates a `Dataset` that includes only 1/`num_shards` of this dataset. -// -// Arguments: -// -// num_shards: An integer representing the number of shards operating in parallel. -// index: An integer representing the current worker index. -// -// -func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShardDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ShardDataset", - Input: []tf.Input{ - input_dataset, num_shards, index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// NonMaxSuppressionV5Attr is an optional argument to NonMaxSuppressionV5. -type NonMaxSuppressionV5Attr func(optionalAttr) - -// NonMaxSuppressionV5PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value. -// -// value: If true, the output `selected_indices` is padded to be of length -// `max_output_size`. Defaults to false. -// If not specified, defaults to false -func NonMaxSuppressionV5PadToMaxOutputSize(value bool) NonMaxSuppressionV5Attr { - return func(m optionalAttr) { - m["pad_to_max_output_size"] = value - } -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system and more -// generally is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// This op also supports a Soft-NMS (with Gaussian weighting) mode (c.f. -// Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score -// of other overlapping boxes instead of directly causing them to be pruned. -// To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be -// larger than 0. -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. -// soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS; see Bodla et -// al (c.f. https://arxiv.org/abs/1704.04503). When `soft_nms_sigma=0.0` (which -// is default), we fall back to standard (hard) NMS. -// -// Returns: -// selected_indices: A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -// selected_scores: A 1-D float tensor of shape `[M]` representing the corresponding -// scores for each selected box, where `M <= max_output_size`. Scores only differ -// from corresponding input scores when using Soft NMS (i.e. when -// `soft_nms_sigma>0`) -// valid_outputs: A 0-D integer tensor representing the number of valid elements in -// `selected_indices`, with the valid elements appearing first. -func NonMaxSuppressionV5(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, soft_nms_sigma tf.Output, optional ...NonMaxSuppressionV5Attr) (selected_indices tf.Output, selected_scores tf.Output, valid_outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV5", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, score_threshold, soft_nms_sigma, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// NonMaxSuppressionV4Attr is an optional argument to NonMaxSuppressionV4. -type NonMaxSuppressionV4Attr func(optionalAttr) - -// NonMaxSuppressionV4PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value. -// -// value: If true, the output `selected_indices` is padded to be of length -// `max_output_size`. Defaults to false. -// If not specified, defaults to false -func NonMaxSuppressionV4PadToMaxOutputSize(value bool) NonMaxSuppressionV4Attr { - return func(m optionalAttr) { - m["pad_to_max_output_size"] = value - } -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system and more -// generally is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. -// -// Returns: -// selected_indices: A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -// valid_outputs: A 0-D integer tensor representing the number of valid elements in -// `selected_indices`, with the valid elements appearing first. -func NonMaxSuppressionV4(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...NonMaxSuppressionV4Attr) (selected_indices tf.Output, valid_outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV4", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, score_threshold, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system and more -// generally is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. -// -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV3(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV3", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, score_threshold, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV2", - Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. -type NonMaxSuppressionAttr func(optionalAttr) - -// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value. -// -// value: A float representing the threshold for deciding whether boxes -// overlap too much with respect to IOU. -// If not specified, defaults to 0.5 -func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr { - return func(m optionalAttr) { - m["iou_threshold"] = value - } -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppression", - Input: []tf.Input{ - boxes, scores, max_output_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. -type CropAndResizeGradBoxesAttr func(optionalAttr) - -// CropAndResizeGradBoxesMethod sets the optional method attribute to value. -// -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// Computes the gradient of the crop_and_resize op wrt the input boxes tensor. -// -// Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// -// Returns A 2-D tensor of shape `[num_boxes, 4]`. -func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CropAndResizeGradBoxes", - Input: []tf.Input{ - grads, image, boxes, box_ind, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExtractGlimpseV2Attr is an optional argument to ExtractGlimpseV2. -type ExtractGlimpseV2Attr func(optionalAttr) - -// ExtractGlimpseV2Centered sets the optional centered attribute to value. -// -// value: indicates if the offset coordinates are centered relative to -// the image, in which case the (0, 0) offset is relative to the center -// of the input images. If false, the (0,0) offset corresponds to the -// upper left corner of the input images. -// If not specified, defaults to true -func ExtractGlimpseV2Centered(value bool) ExtractGlimpseV2Attr { - return func(m optionalAttr) { - m["centered"] = value - } -} - -// ExtractGlimpseV2Normalized sets the optional normalized attribute to value. -// -// value: indicates if the offset coordinates are normalized. -// If not specified, defaults to true -func ExtractGlimpseV2Normalized(value bool) ExtractGlimpseV2Attr { - return func(m optionalAttr) { - m["normalized"] = value - } -} - -// ExtractGlimpseV2UniformNoise sets the optional uniform_noise attribute to value. -// -// value: indicates if the noise should be generated using a -// uniform distribution or a Gaussian distribution. -// If not specified, defaults to true -func ExtractGlimpseV2UniformNoise(value bool) ExtractGlimpseV2Attr { - return func(m optionalAttr) { - m["uniform_noise"] = value - } -} - -// ExtractGlimpseV2Noise sets the optional noise attribute to value. -// -// value: indicates if the noise should `uniform`, `gaussian`, or -// `zero`. The default is `uniform` which means the the noise type -// will be decided by `uniform_noise`. -// If not specified, defaults to "uniform" -func ExtractGlimpseV2Noise(value string) ExtractGlimpseV2Attr { - return func(m optionalAttr) { - m["noise"] = value - } -} - -// Extracts a glimpse from the input tensor. -// -// Returns a set of windows called glimpses extracted at location -// `offsets` from the input tensor. If the windows only partially -// overlaps the inputs, the non overlapping areas will be filled with -// random noise. -// -// The result is a 4-D tensor of shape `[batch_size, glimpse_height, -// glimpse_width, channels]`. The channels and batch dimensions are the -// same as that of the input tensor. The height and width of the output -// windows are specified in the `size` parameter. -// -// The argument `normalized` and `centered` controls how the windows are built: -// -// * If the coordinates are normalized but not centered, 0.0 and 1.0 -// correspond to the minimum and maximum of each height and width -// dimension. -// * If the coordinates are both normalized and centered, they range from -// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper -// left corner, the lower right corner is located at (1.0, 1.0) and the -// center is at (0, 0). -// * If the coordinates are not normalized they are interpreted as -// numbers of pixels. -// -// Arguments: -// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. -// size: A 1-D tensor of 2 elements containing the size of the glimpses -// to extract. The glimpse height must be specified first, following -// by the glimpse width. -// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing -// the y, x locations of the center of each window. -// -// Returns A tensor representing the glimpses `[batch_size, -// glimpse_height, glimpse_width, channels]`. -func ExtractGlimpseV2(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseV2Attr) (glimpse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExtractGlimpseV2", - Input: []tf.Input{ - input, size, offsets, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExtractGlimpseAttr is an optional argument to ExtractGlimpse. -type ExtractGlimpseAttr func(optionalAttr) - -// ExtractGlimpseCentered sets the optional centered attribute to value. -// -// value: indicates if the offset coordinates are centered relative to -// the image, in which case the (0, 0) offset is relative to the center -// of the input images. If false, the (0,0) offset corresponds to the -// upper left corner of the input images. -// If not specified, defaults to true -func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["centered"] = value - } -} - -// ExtractGlimpseNormalized sets the optional normalized attribute to value. -// -// value: indicates if the offset coordinates are normalized. -// If not specified, defaults to true -func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["normalized"] = value - } -} - -// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value. -// -// value: indicates if the noise should be generated using a -// uniform distribution or a Gaussian distribution. -// If not specified, defaults to true -func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["uniform_noise"] = value - } -} - -// ExtractGlimpseNoise sets the optional noise attribute to value. -// -// value: indicates if the noise should `uniform`, `gaussian`, or -// `zero`. The default is `uniform` which means the the noise type -// will be decided by `uniform_noise`. -// If not specified, defaults to "uniform" -func ExtractGlimpseNoise(value string) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["noise"] = value - } -} - -// Extracts a glimpse from the input tensor. -// -// Returns a set of windows called glimpses extracted at location -// `offsets` from the input tensor. If the windows only partially -// overlaps the inputs, the non overlapping areas will be filled with -// random noise. -// -// The result is a 4-D tensor of shape `[batch_size, glimpse_height, -// glimpse_width, channels]`. The channels and batch dimensions are the -// same as that of the input tensor. The height and width of the output -// windows are specified in the `size` parameter. -// -// The argument `normalized` and `centered` controls how the windows are built: -// -// * If the coordinates are normalized but not centered, 0.0 and 1.0 -// correspond to the minimum and maximum of each height and width -// dimension. -// * If the coordinates are both normalized and centered, they range from -// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper -// left corner, the lower right corner is located at (1.0, 1.0) and the -// center is at (0, 0). -// * If the coordinates are not normalized they are interpreted as -// numbers of pixels. -// -// Arguments: -// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. -// size: A 1-D tensor of 2 elements containing the size of the glimpses -// to extract. The glimpse height must be specified first, following -// by the glimpse width. -// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing -// the y, x locations of the center of each window. -// -// Returns A tensor representing the glimpses `[batch_size, -// glimpse_height, glimpse_width, channels]`. -func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExtractGlimpse", - Input: []tf.Input{ - input, size, offsets, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. -type SampleDistortedBoundingBoxAttr func(optionalAttr) - -// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. -// -// value: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. -// If not specified, defaults to 0.1 -func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["min_object_covered"] = value - } -} - -// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. -// -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["aspect_ratio_range"] = value - } -} - -// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. -// -// value: The cropped area of the image must contain a fraction of the -// supplied image within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["area_range"] = value - } -} - -// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. -// -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["max_attempts"] = value - } -} - -// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. -// -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value - } -} - -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) -// -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` -// -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. -// -// Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. -// -// Returns: -// begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`. -// size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`. -// bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBox", - Input: []tf.Input{ - image_size, bounding_boxes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Draw bounding boxes on a batch of images. -// -// Outputs a copy of `images` but draws on top of the pixels zero or more bounding -// boxes specified by the locations in `boxes`. The coordinates of the each -// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, if an image is 100 x 200 pixels (height x width) and the bounding -// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of -// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates). -// -// Parts of the bounding box may fall outside the image. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. -// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding -// boxes. -// colors: 2-D. A list of RGBA colors to cycle through for the boxes. -// -// Returns 4-D with the same shape as `images`. The batch of input images with -// bounding boxes drawn on the images. -func DrawBoundingBoxesV2(scope *Scope, images tf.Output, boxes tf.Output, colors tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DrawBoundingBoxesV2", - Input: []tf.Input{ - images, boxes, colors, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Draw bounding boxes on a batch of images. -// -// Outputs a copy of `images` but draws on top of the pixels zero or more bounding -// boxes specified by the locations in `boxes`. The coordinates of the each -// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, if an image is 100 x 200 pixels (height x width) and the bounding -// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of -// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). -// -// Parts of the bounding box may fall outside the image. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. -// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding -// boxes. -// -// Returns 4-D with the same shape as `images`. The batch of input images with -// bounding boxes drawn on the images. -func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DrawBoundingBoxes", - Input: []tf.Input{ - images, boxes, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Convert one or more images from HSV to RGB. -// -// Outputs a tensor of the same shape as the `images` tensor, containing the RGB -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. -// -// See `rgb_to_hsv` for a description of the HSV encoding. -// -// Arguments: -// images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. -// -// Returns `images` converted to RGB. -func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "HSVToRGB", - Input: []tf.Input{ - images, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts one or more images from RGB to HSV. -// -// Outputs a tensor of the same shape as the `images` tensor, containing the HSV -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. -// -// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and -// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 -// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. -// -// Usage Example: -// -// >>> blue_image = tf.stack([ -// ... tf.zeros([5,5]), -// ... tf.zeros([5,5]), -// ... tf.ones([5,5])], -// ... axis=-1) -// >>> blue_hsv_image = tf.image.rgb_to_hsv(blue_image) -// >>> blue_hsv_image[0,0].numpy() -// array([0.6666667, 1. , 1. ], dtype=float32) -// -// -// Arguments: -// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. -// -// Returns `images` converted to HSV. -func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RGBToHSV", - Input: []tf.Input{ - images, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Decode the frame(s) of a GIF-encoded image to a uint8 tensor. -// -// GIF images with frame or transparency compression are not supported. -// On Linux and MacOS systems, convert animated GIFs from compressed to -// uncompressed by running: -// -// convert $src.gif -coalesce $dst.gif -// -// This op also supports decoding JPEGs and PNGs, though it is cleaner to use -// `tf.io.decode_image`. -// -// Arguments: -// contents: 0-D. The GIF-encoded image. -// -// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB channel order. -func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeGif", - Input: []tf.Input{ - contents, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeBmpAttr is an optional argument to DecodeBmp. -type DecodeBmpAttr func(optionalAttr) - -// DecodeBmpChannels sets the optional channels attribute to value. -// If not specified, defaults to 0 -func DecodeBmpChannels(value int64) DecodeBmpAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// Decode the first frame of a BMP-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the BMP-encoded image. -// * 3: output an RGB image. -// * 4: output an RGBA image. -// -// Arguments: -// contents: 0-D. The BMP-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`. RGB order -func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeBmp", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodePngAttr is an optional argument to EncodePng. -type EncodePngAttr func(optionalAttr) - -// EncodePngCompression sets the optional compression attribute to value. -// -// value: Compression level. -// If not specified, defaults to -1 -func EncodePngCompression(value int64) EncodePngAttr { - return func(m optionalAttr) { - m["compression"] = value - } -} - -// PNG-encode an image. -// -// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` -// where `channels` is: -// -// * 1: for grayscale. -// * 2: for grayscale + alpha. -// * 3: for RGB. -// * 4: for RGBA. -// -// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder -// default or a value from 0 to 9. 9 is the highest compression level, generating -// the smallest output, but is slower. -// -// Arguments: -// image: 3-D with shape `[height, width, channels]`. -// -// Returns 0-D. PNG-encoded image. -func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EncodePng", - Input: []tf.Input{ - image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Invert (flip) each bit of supported types; for example, type `uint8` value 01010101 becomes 10101010. -// -// Flip each bit of supported types. For example, type `int8` (decimal 2) binary 00000010 becomes (decimal -3) binary 11111101. -// This operation is performed on each element of the tensor argument `x`. -// -// Example: -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// -// # flip 2 (00000010) to -3 (11111101) -// tf.assert_equal(-3, bitwise_ops.invert(2)) -// -// dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, -// dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64] -// -// inputs = [0, 5, 3, 14] -// for dtype in dtype_list: -// # Because of issues with negative numbers, let's test this indirectly. -// # 1. invert(a) and a = 0 -// # 2. invert(a) or a = invert(0) -// input_tensor = tf.constant([0, 5, 3, 14], dtype=dtype) -// not_a_and_a, not_a_or_a, not_0 = [bitwise_ops.bitwise_and( -// input_tensor, bitwise_ops.invert(input_tensor)), -// bitwise_ops.bitwise_or( -// input_tensor, bitwise_ops.invert(input_tensor)), -// bitwise_ops.invert( -// tf.constant(0, dtype=dtype))] -// -// expected = tf.constant([0, 0, 0, 0], dtype=tf.float32) -// tf.assert_equal(tf.cast(not_a_and_a, tf.float32), expected) -// -// expected = tf.cast([not_0] * 4, tf.float32) -// tf.assert_equal(tf.cast(not_a_or_a, tf.float32), expected) -// -// # For unsigned dtypes let's also check the result directly. -// if dtype.is_unsigned: -// inverted = bitwise_ops.invert(input_tensor) -// expected = tf.constant([dtype.max - x for x in inputs], dtype=tf.float32) -// tf.assert_equal(tf.cast(inverted, tf.float32), tf.cast(expected, tf.float32)) -// ``` -func Invert(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Invert", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodePngAttr is an optional argument to DecodePng. -type DecodePngAttr func(optionalAttr) - -// DecodePngChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodePngChannels(value int64) DecodePngAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodePngDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_UINT8 -func DecodePngDtype(value tf.DataType) DecodePngAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Decode a PNG-encoded image to a uint8 or uint16 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the PNG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// * 4: output an RGBA image. -// -// If needed, the PNG-encoded image is transformed to match the requested number -// of color channels. -// -// This op also supports decoding JPEGs and non-animated GIFs since the interface -// is the same, though it is cleaner to use `tf.io.decode_image`. -// -// Arguments: -// contents: 0-D. The PNG-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`. -func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodePng", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adjust the saturation of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpreted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A scale is then applied all the saturation -// values, and then remapped back to RGB colorspace. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// scale: A float scale to add to the saturation. -// -// Returns The hue-adjusted image or images. -func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustSaturation", - Input: []tf.Input{ - images, scale, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. -type ExtractJpegShapeAttr func(optionalAttr) - -// ExtractJpegShapeOutputType sets the optional output_type attribute to value. -// -// value: (Optional) The output type of the operation (int32 or int64). -// Defaults to int32. -// If not specified, defaults to DT_INT32 -func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr { - return func(m optionalAttr) { - m["output_type"] = value - } -} - -// Extract the shape information of a JPEG-encoded image. -// -// This op only parses the image header, so it is much faster than DecodeJpeg. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// -// Returns 1-D. The image shape with format [height, width, channels]. -func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExtractJpegShape", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomCropAttr is an optional argument to RandomCrop. -type RandomCropAttr func(optionalAttr) - -// RandomCropSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomCropSeed(value int64) RandomCropAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomCropSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomCropSeed2(value int64) RandomCropAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly crop `image`. -// -// DEPRECATED at GraphDef version 8: Random crop is now pure Python -// -// `size` is a 1-D int64 tensor with 2 elements representing the crop height and -// width. The values must be non negative. -// -// This Op picks a random location in `image` and crops a `height` by `width` -// rectangle from that location. The random location is picked so the cropped -// area will fit inside the original image. -// -// Arguments: -// image: 3-D of shape `[height, width, channels]`. -// size: 1-D of length 2 containing: `crop_height`, `crop_width`.. -// -// Returns 3-D of shape `[crop_height, crop_width, channels].` -func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomCrop", - Input: []tf.Input{ - image, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad. -type ResizeNearestNeighborGradAttr func(optionalAttr) - -// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeNearestNeighborGradHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeNearestNeighborGradHalfPixelCenters(value bool) ResizeNearestNeighborGradAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Computes the gradient of nearest neighbor interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The -// original input size. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients -// with respect to the input image. -func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeNearestNeighborGrad", - Input: []tf.Input{ - grads, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Initializes the multi device iterator with the given dataset. -// -// Arguments: -// dataset: Dataset to be iterated upon. -// multi_device_iterator: A MultiDeviceIteratorResource. -// max_buffer_size: The maximum size of the host side per device buffer to keep. -// -// Returns An int64 indicating which incarnation of the MultiDeviceIterator -// is running. -func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorInit", - Input: []tf.Input{ - dataset, multi_device_iterator, max_buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Disallowed in GraphDef version >= 2. -// -// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead -func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustContrast", - Input: []tf.Input{ - images, contrast_factor, min_value, max_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A placeholder op for a value that will be fed into the computation. -// -// Arguments: -// dtype: The type of elements in the tensor. -// shape: The shape of the tensor. -// -// Returns A tensor that will be provided using the infeed mechanism. -func InfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - opspec := tf.OpSpec{ - Type: "InfeedDequeue", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Encodes a `RaggedTensor` into a `variant` Tensor. -// -// -// Encodes the given `RaggedTensor` and returns a `variant` Tensor. If -// `batched_input` is True, then input `RaggedTensor` is unbatched along the -// zero-th dimension, each component `RaggedTensor` is encoded into a scalar -// `variant` Tensor, and these are stacked to return a 1-D `variant` Tensor. -// If `batched_input` is False, then the input `RaggedTensor` is encoded as is and -// a scalar `variant` Tensor is returned. A `RaggedTensor` is encoded by first -// creating a 1-D `variant` Tensor with `ragged_rank + 1` elements, containing the -// splits and values Tensors of the `RaggedTensor`. Then the 1-D `variant` Tensor -// is wrapped in a scalar `variant` Tensor. See `RaggedTensorFromVariant` for the -// corresponding decoding logic. -// -// -// Arguments: -// rt_nested_splits: A list of one or more Tensors representing the splits of the input -// `RaggedTensor`. -// rt_dense_values: A Tensor representing the values of the input `RaggedTensor`. -// batched_input: A `bool` denoting whether the input is a batched `RaggedTensor`. -// -// Returns A `variant` Tensor that containing encoded `RaggedTensor`. -func RaggedTensorToVariant(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output, batched_input bool) (encoded_ragged tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"batched_input": batched_input} - opspec := tf.OpSpec{ - Type: "RaggedTensorToVariant", - Input: []tf.Input{ - tf.OutputList(rt_nested_splits), rt_dense_values, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor. -type ResizeNearestNeighborAttr func(optionalAttr) - -// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeNearestNeighborHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeNearestNeighborHalfPixelCenters(value bool) ResizeNearestNeighborAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Resize `images` to `size` using nearest neighbor interpolation. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeNearestNeighbor", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Runs multiple additive regression ensemble predictors on input instances and -// -// computes the logits. It is designed to be used during prediction. -// It traverses all the trees and calculates the final score for each instance. -// -// Arguments: -// -// bucketized_features: A list of rank 1 Tensors containing bucket id for each -// feature. -// logits_dimension: scalar, dimension of the logits, to be used for partial logits -// shape. -// -// Returns Output rank 2 Tensor containing logits for each example. -func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesPredict", - Input: []tf.Input{ - tree_ensemble_handle, tf.OutputList(bucketized_features), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) - -// RandomGammaSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomGammaSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from the Gamma distribution(s) described by alpha. -// -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 -// -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. -// -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomGamma", - Input: []tf.Input{ - shape, alpha, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns 0 if x == 0, and x * log1p(y) otherwise, elementwise. -func Xlog1py(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Xlog1py", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. -type QuantizedResizeBilinearAttr func(optionalAttr) - -// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// QuantizedResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func QuantizedResizeBilinearHalfPixelCenters(value bool) QuantizedResizeBilinearAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Resize quantized `images` to `size` using quantized bilinear interpolation. -// -// Input images and output images must be quantized types. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// -// -// Returns: -// resized_images: 4-D with shape -// `[batch, new_height, new_width, channels]`. -// out_min -// out_max -func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedResizeBilinear", - Input: []tf.Input{ - images, size, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResizeAreaAttr is an optional argument to ResizeArea. -type ResizeAreaAttr func(optionalAttr) - -// ResizeAreaAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using area interpolation. -// -// Input images can be of different types but output images are always float. -// -// The range of pixel values for the output image might be slightly different -// from the range for the input image because of limited numerical precision. -// To guarantee an output range, for example `[0.0, 1.0]`, apply -// `tf.clip_by_value` to the output. -// -// Each output pixel is computed by first transforming the pixel's footprint into -// the input tensor and then averaging the pixels that intersect the footprint. An -// input pixel's contribution to the average is weighted by the fraction of its -// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeArea", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Restore a reader to a previously saved state. -// -// Not all Readers support being restored, so this can produce an -// Unimplemented error. -// -// Arguments: -// reader_handle: Handle to a Reader. -// state: Result of a ReaderSerializeState of a Reader with type -// matching reader_handle. -// -// Returns the created operation. -func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderRestoreStateV2", - Input: []tf.Input{ - reader_handle, state, - }, - } - return scope.AddOperation(opspec) -} - -// Computes rectified linear 6: `min(max(features, 0), 6)`. -func Relu6(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu6", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RaggedRangeAttr is an optional argument to RaggedRange. -type RaggedRangeAttr func(optionalAttr) - -// RaggedRangeTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func RaggedRangeTsplits(value tf.DataType) RaggedRangeAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Returns a `RaggedTensor` containing the specified sequences of numbers. -// -// -// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and -// `rt_nested_splits`, such that -// `result[i] = range(starts[i], limits[i], deltas[i])`. -// -// ```python -// (rt_nested_splits, rt_dense_values) = ragged_range( -// starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) -// result = tf.ragged.from_row_splits(rt_dense_values, rt_nested_splits) -// print(result) -// -// ``` -// -// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. -// The vector inputs must all have the same size. Scalar inputs are broadcast -// to match the size of the vector inputs. -// -// Arguments: -// starts: The starts of each range. -// limits: The limits of each range. -// deltas: The deltas of each range. -// -// Returns: -// rt_nested_splits: The `row_splits` for the returned `RaggedTensor`. -// rt_dense_values: The `flat_values` for the returned `RaggedTensor`. -func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output, optional ...RaggedRangeAttr) (rt_nested_splits tf.Output, rt_dense_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RaggedRange", - Input: []tf.Input{ - starts, limits, deltas, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Split a `SparseTensor` into `num_split` tensors along one dimension. -// -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] -// -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. -// -// Returns: -// output_indices -// output_values: A list of 1-D tensors represents the values of the output sparse -// tensors. -// output_shape: A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_split": num_split} - opspec := tf.OpSpec{ - Type: "SparseSplit", - Input: []tf.Input{ - split_dim, indices, values, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape -} - -// Produce a string tensor that encodes the state of a Reader. -// -// Not all Readers support being serialized, so this can produce an -// Unimplemented error. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderSerializeStateV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns up to `num_records` (key, value) pairs produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). -// It may return less than `num_records` even before the last batch. -// -// Arguments: -// reader_handle: Handle to a `Reader`. -// queue_handle: Handle to a `Queue`, with string work items. -// num_records: number of records to read from `Reader`. -// -// Returns: -// keys: A 1-D tensor. -// values: A 1-D tensor. -func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderReadUpToV2", - Input: []tf.Input{ - reader_handle, queue_handle, num_records, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. -type QueueDequeueV2Attr func(optionalAttr) - -// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is empty, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Dequeues a tuple of one or more tensors from the given queue. -// -// This operation has k outputs, where k is the number of components -// in the tuples stored in the given queue, and output i is the ith -// component of the dequeued tuple. -// -// N.B. If the queue is empty, this operation will block until an element -// has been dequeued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// component_types: The type of each component in a tuple. -// -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueDequeueV2", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) - return - } - return components -} - -// Return a slice from 'input'. -// -// The output tensor is a tensor with dimensions described by 'size' -// whose values are extracted from 'input' starting at the offsets in -// 'begin'. -// -// *Requirements*: -// 0 <= begin[i] <= begin[i] + size[i] <= Di for i in [0, n) -// -// Arguments: -// -// begin: begin[i] specifies the offset into the 'i'th dimension of -// 'input' to slice from. -// size: size[i] specifies the number of elements of the 'i'th dimension -// of 'input' to slice. If size[i] is -1, all remaining elements in dimension -// i are included in the slice (i.e. this is equivalent to setting -// size[i] = input.dim_size(i) - begin[i]). -func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Slice", - Input: []tf.Input{ - input, begin, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2. -type TFRecordReaderV2Attr func(optionalAttr) - -// TFRecordReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TFRecordReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value. -// If not specified, defaults to "" -func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// A Reader that outputs the records from a TensorFlow Records file. -// -// Returns The handle to reference the Reader. -func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TFRecordReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseExampleDatasetAttr is an optional argument to ParseExampleDataset. -type ParseExampleDatasetAttr func(optionalAttr) - -// ParseExampleDatasetSloppy sets the optional sloppy attribute to value. -// If not specified, defaults to false -func ParseExampleDatasetSloppy(value bool) ParseExampleDatasetAttr { - return func(m optionalAttr) { - m["sloppy"] = value - } -} - -// ParseExampleDatasetRaggedKeys sets the optional ragged_keys attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetRaggedKeys(value []string) ParseExampleDatasetAttr { - return func(m optionalAttr) { - m["ragged_keys"] = value - } -} - -// ParseExampleDatasetRaggedValueTypes sets the optional ragged_value_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetRaggedValueTypes(value []tf.DataType) ParseExampleDatasetAttr { - return func(m optionalAttr) { - m["ragged_value_types"] = value - } -} - -// ParseExampleDatasetRaggedSplitTypes sets the optional ragged_split_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseExampleDatasetRaggedSplitTypes(value []tf.DataType) ParseExampleDatasetAttr { - return func(m optionalAttr) { - m["ragged_split_types"] = value - } -} - -// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features. -// -// Arguments: -// -// -// dense_defaults: A dict mapping string keys to `Tensor`s. -// The keys of the dict must match the dense_keys of the feature. -// sparse_keys: A list of string keys in the examples features. -// The results for these keys will be returned as `SparseTensor` objects. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples features associated with dense values. -// sparse_types: A list of `DTypes` of the same length as `sparse_keys`. -// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), -// and `tf.string` (`BytesList`) are supported. -// dense_shapes: List of tuples with the same length as `dense_keys`. -// The shape of the data for each dense feature referenced by `dense_keys`. -// Required for any input tensors identified by `dense_keys`. Must be -// either fully defined, or may contain an unknown first dimension. -// An unknown first dimension means the feature is treated as having -// a variable number of blocks, and the output shape along this dimension -// is considered unknown at graph build time. Padding is applied for -// minibatch elements smaller than the maximum number of blocks for the -// given feature along this dimension. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ParseExampleDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParseExampleDataset", - Input: []tf.Input{ - input_dataset, num_parallel_calls, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// IdentityReaderV2Attr is an optional argument to IdentityReaderV2. -type IdentityReaderV2Attr func(optionalAttr) - -// IdentityReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func IdentityReaderV2Container(value string) IdentityReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// IdentityReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A Reader that outputs the queued work as both the key and value. -// -// To use, enqueue strings in a Queue. ReaderRead will take the front -// work string and output (work, work). -// -// Returns The handle to reference the Reader. -func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IdentityReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. -type FixedLengthRecordReaderV2Attr func(optionalAttr) - -// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value. -// -// value: Number of bytes in the header, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["header_bytes"] = value - } -} - -// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value. -// -// value: Number of bytes in the footer, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["footer_bytes"] = value - } -} - -// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value. -// -// value: Number of bytes to hop before each read. Default of 0 means using -// record_bytes. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["hop_bytes"] = value - } -} - -// FixedLengthRecordReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value. -// -// value: The type of encoding for the file. Currently ZLIB and GZIP -// are supported. Defaults to none. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["encoding"] = value - } -} - -// A Reader that outputs fixed-length records from a file. -// -// Arguments: -// record_bytes: Number of bytes in the record. -// -// Returns The handle to reference the Reader. -func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"record_bytes": record_bytes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FixedLengthRecordReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Saves the input tensors to disk. -// -// The size of `tensor_names` must match the number of tensors in `data`. `data[i]` -// is written to `filename` with name `tensor_names[i]`. -// -// See also `SaveSlices`. -// -// Arguments: -// filename: Must have a single element. The name of the file to which we write -// the tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// data: `N` tensors to save. -// -// Returns the created operation. -func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Save", - Input: []tf.Input{ - filename, tensor_names, tf.OutputList(data), - }, - } - return scope.AddOperation(opspec) -} - -// DatasetToGraphV2Attr is an optional argument to DatasetToGraphV2. -type DatasetToGraphV2Attr func(optionalAttr) - -// DatasetToGraphV2ExternalStatePolicy sets the optional external_state_policy attribute to value. -// If not specified, defaults to 0 -func DatasetToGraphV2ExternalStatePolicy(value int64) DatasetToGraphV2Attr { - return func(m optionalAttr) { - m["external_state_policy"] = value - } -} - -// DatasetToGraphV2StripDeviceAssignment sets the optional strip_device_assignment attribute to value. -// If not specified, defaults to false -func DatasetToGraphV2StripDeviceAssignment(value bool) DatasetToGraphV2Attr { - return func(m optionalAttr) { - m["strip_device_assignment"] = value - } -} - -// Returns a serialized GraphDef representing `input_dataset`. -// -// Returns a graph representation for `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to return the graph representation for. -// -// Returns The graph representation of the dataset (as serialized GraphDef). -func DatasetToGraphV2(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphV2Attr) (graph tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DatasetToGraphV2", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Restores tensors from a V2 checkpoint. -// -// For backward compatibility with the V1 format, this Op currently allows -// restoring from a V1 checkpoint as well: -// - This Op first attempts to find the V2 index file pointed to by "prefix", and -// if found proceed to read it as a V2 checkpoint; -// - Otherwise the V1 read path is invoked. -// Relying on this behavior is not recommended, as the ability to fall back to read -// V1 might be deprecated and eventually removed. -// -// By default, restores the named tensors in full. If the caller wishes to restore -// specific slices of stored tensors, "shape_and_slices" should be non-empty -// strings and correspondingly well-formed. -// -// Callers must ensure all the named tensors are indeed stored in the checkpoint. -// -// Arguments: -// prefix: Must have a single element. The prefix of a V2 checkpoint. -// tensor_names: shape {N}. The names of the tensors to be restored. -// shape_and_slices: shape {N}. The slice specs of the tensors to be restored. -// Empty strings indicate that they are non-partitioned tensors. -// dtypes: shape {N}. The list of expected dtype for the tensors. Must match -// those stored in the checkpoint. -// -// Returns shape {N}. The restored tensors, whose shapes are read from the -// checkpoint directly. -func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - opspec := tf.OpSpec{ - Type: "RestoreV2", - Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil { - scope.UpdateErr("RestoreV2", err) - return - } - return tensors -} - -// Delete the TensorArray from its resource container. -// -// This enables the user to close and release the resource in the middle -// of a step/run. -// -// Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). -// -// Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// Saves tensors in V2 checkpoint format. -// -// By default, saves the named tensors in full. If the caller wishes to save -// specific slices of full tensors, "shape_and_slices" should be non-empty strings -// and correspondingly well-formed. -// -// Arguments: -// prefix: Must have a single element. The prefix of the V2 checkpoint to which we -// write the tensors. -// tensor_names: shape {N}. The names of the tensors to be saved. -// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. -// Empty strings indicate that they are non-partitioned tensors. -// tensors: `N` tensors to save. -// -// Returns the created operation. -func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SaveV2", - Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), - }, - } - return scope.AddOperation(opspec) -} - -// SparseCountSparseOutputAttr is an optional argument to SparseCountSparseOutput. -type SparseCountSparseOutputAttr func(optionalAttr) - -// SparseCountSparseOutputMinlength sets the optional minlength attribute to value. -// -// value: Minimum value to count. Can be set to -1 for no minimum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func SparseCountSparseOutputMinlength(value int64) SparseCountSparseOutputAttr { - return func(m optionalAttr) { - m["minlength"] = value - } -} - -// SparseCountSparseOutputMaxlength sets the optional maxlength attribute to value. -// -// value: Maximum value to count. Can be set to -1 for no maximum. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func SparseCountSparseOutputMaxlength(value int64) SparseCountSparseOutputAttr { - return func(m optionalAttr) { - m["maxlength"] = value - } -} - -// Performs sparse-output bin counting for a sparse tensor input. -// -// Counts the number of times each value occurs in the input. -// -// Arguments: -// indices: Tensor containing the indices of the sparse tensor to count. -// values: Tensor containing values of the sparse tensor to count. -// dense_shape: Tensor containing the dense shape of the sparse tensor to count. -// weights: A Tensor of the same shape as indices containing per-index weight values. -// May also be the empty tensor if no weights are used. -// binary_output: Whether to output the number of occurrences of each value or 1. -// -// Returns: -// output_indices: Indices tensor for the resulting sparse tensor object. -// output_values: Values tensor for the resulting sparse tensor object. -// output_dense_shape: Shape tensor for the resulting sparse tensor object. -func SparseCountSparseOutput(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, weights tf.Output, binary_output bool, optional ...SparseCountSparseOutputAttr) (output_indices tf.Output, output_values tf.Output, output_dense_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"binary_output": binary_output} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseCountSparseOutput", - Input: []tf.Input{ - indices, values, dense_shape, weights, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DebugNumericSummaryV2Attr is an optional argument to DebugNumericSummaryV2. -type DebugNumericSummaryV2Attr func(optionalAttr) - -// DebugNumericSummaryV2OutputDtype sets the optional output_dtype attribute to value. -// -// value: Optional. The type of the output. Can be float32 or float64 (default: float32). -// If not specified, defaults to DT_FLOAT -func DebugNumericSummaryV2OutputDtype(value tf.DataType) DebugNumericSummaryV2Attr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// DebugNumericSummaryV2TensorDebugMode sets the optional tensor_debug_mode attribute to value. -// -// value: Tensor debug mode: the mode in which the input tensor is summarized -// by the op. See the TensorDebugMode enum in -// tensorflow/core/protobuf/debug_event.proto for details. -// -// Supported values: -// 2 (CURT_HEALTH): Output a float32/64 tensor of shape [2]. The 1st -// element is the tensor_id, if provided, and -1 otherwise. The 2nd -// element is a bit which is set to 1 if the input tensor has an -// infinity or nan value, or zero otherwise. -// -// 3 (CONCISE_HEALTH): Output a float32/64 tensor of shape [5]. The 1st -// element is the tensor_id, if provided, and -1 otherwise. The -// remaining four slots are the total number of elements, -infs, -// +infs, and nans in the input tensor respectively. -// -// 4 (FULL_HEALTH): Output a float32/64 tensor of shape [11]. The 1st -// element is the tensor_id, if provided, and -1 otherwise. The 2nd -// element is the device_id, if provided, and -1 otherwise. The 3rd -// element holds the datatype value of the input tensor as according -// to the enumerated type in tensorflow/core/framework/types.proto. -// The remaining elements hold the total number of elements, -infs, -// +infs, nans, negative finite numbers, zeros, and positive finite -// numbers in the input tensor respectively. -// -// 5 (SHAPE): Output a float32/64 tensor of shape [10]. The 1st -// element is the tensor_id, if provided, and -1 otherwise. The 2nd -// element holds the datatype value of the input tensor as according -// to the enumerated type in tensorflow/core/framework/types.proto. -// The 3rd element holds the rank of the tensor. The 4th element holds -// the number of elements within the tensor. Finally the remaining 6 -// elements hold the shape of the tensor. If the rank of the tensor -// is lower than 6, the shape is right padded with zeros. If the rank -// is greater than 6, the head of the shape is truncated. -// -// 6 (FULL_NUMERICS): Output a float32/64 tensor of shape [22]. The 1st -// element is the tensor_id, if provided, and -1 otherwise. The 2nd -// element is the device_id, if provided, and -1 otherwise. The 3rd -// element holds the datatype value of the input tensor as according -// to the enumerated type in tensorflow/core/framework/types.proto. -// The 4th element holds the rank of the tensor. The 5th to 11th -// elements hold the shape of the tensor. If the rank of the tensor -// is lower than 6, the shape is right padded with zeros. If the rank -// is greater than 6, the head of the shape is truncated. The 12th to -// 18th elements hold the number of elements, -infs, +infs, nans, -// denormal floats, negative finite numbers, zeros, and positive -// finite numbers in the input tensor respectively. The final four -// elements hold the min value, max value, mean, and variance of the -// input tensor. -// -// 8 (REDUCE_INF_NAN_THREE_SLOTS): Output a float32/64 tensor of shape -// [3]. The 1st element is -inf if any elements of the input tensor -// is -inf, or zero otherwise. The 2nd element is +inf if any elements -// of the input tensor is +inf, or zero otherwise. The 3rd element is -// nan if any element of the input tensor is nan, or zero otherwise. -// If not specified, defaults to -1 -func DebugNumericSummaryV2TensorDebugMode(value int64) DebugNumericSummaryV2Attr { - return func(m optionalAttr) { - m["tensor_debug_mode"] = value - } -} - -// DebugNumericSummaryV2TensorId sets the optional tensor_id attribute to value. -// -// value: Optional. An integer identifier for the tensor being summarized by this op. -// If not specified, defaults to -1 -func DebugNumericSummaryV2TensorId(value int64) DebugNumericSummaryV2Attr { - return func(m optionalAttr) { - m["tensor_id"] = value - } -} - -// Debug Numeric Summary V2 Op. -// -// Computes a numeric summary of the input tensor. The shape of the output -// depends on the tensor_debug_mode attribute. -// This op is used internally by TensorFlow Debugger (tfdbg) v2. -// -// Arguments: -// input: Input tensor, to be summarized by the op. -func DebugNumericSummaryV2(scope *Scope, input tf.Output, optional ...DebugNumericSummaryV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DebugNumericSummaryV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DebugNumericSummaryAttr is an optional argument to DebugNumericSummary. -type DebugNumericSummaryAttr func(optionalAttr) - -// DebugNumericSummaryDeviceName sets the optional device_name attribute to value. -// If not specified, defaults to "" -func DebugNumericSummaryDeviceName(value string) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["device_name"] = value - } -} - -// DebugNumericSummaryTensorName sets the optional tensor_name attribute to value. -// -// value: Name of the input tensor. -// If not specified, defaults to "" -func DebugNumericSummaryTensorName(value string) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["tensor_name"] = value - } -} - -// DebugNumericSummaryDebugUrls sets the optional debug_urls attribute to value. -// -// value: List of URLs to debug targets, e.g., -// file:///foo/tfdbg_dump, grpc:://localhost:11011. -// If not specified, defaults to <> -func DebugNumericSummaryDebugUrls(value []string) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["debug_urls"] = value - } -} - -// DebugNumericSummaryLowerBound sets the optional lower_bound attribute to value. -// -// value: (float) The lower bound <= which values will be included in the -// generalized -inf count. Default: -inf. -// If not specified, defaults to -inf -func DebugNumericSummaryLowerBound(value float32) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["lower_bound"] = value - } -} - -// DebugNumericSummaryUpperBound sets the optional upper_bound attribute to value. -// -// value: (float) The upper bound >= which values will be included in the -// generalized +inf count. Default: +inf. -// If not specified, defaults to inf -func DebugNumericSummaryUpperBound(value float32) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["upper_bound"] = value - } -} - -// DebugNumericSummaryMuteIfHealthy sets the optional mute_if_healthy attribute to value. -// -// value: (bool) Do not send data to the debug URLs unless at least one -// of elements [2], [3] and [7] (i.e., the nan count and the generalized -inf and -// inf counts) is non-zero. -// If not specified, defaults to false -func DebugNumericSummaryMuteIfHealthy(value bool) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["mute_if_healthy"] = value - } -} - -// DebugNumericSummaryGatedGrpc sets the optional gated_grpc attribute to value. -// -// value: Whether this op will be gated. If any of the debug_urls of this -// debug node is of the grpc:// scheme, when the value of this attribute is set -// to True, the data will not actually be sent via the grpc stream unless this -// debug op has been enabled at the debug_url. If all of the debug_urls of this -// debug node are of the grpc:// scheme and the debug op is enabled at none of -// them, the output will be an empty Tensor. -// If not specified, defaults to false -func DebugNumericSummaryGatedGrpc(value bool) DebugNumericSummaryAttr { - return func(m optionalAttr) { - m["gated_grpc"] = value - } -} - -// Debug Numeric Summary Op. -// -// Provide a basic summary of numeric value types, range and distribution. -// -// output: A double tensor of shape [14 + nDimensions], where nDimensions is the -// number of dimensions of the tensor's shape. The elements of output are: -// [0]: is initialized (1.0) or not (0.0). -// [1]: total number of elements -// [2]: NaN element count -// [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by -// default. -// [4]: negative element count (excluding -inf), if lower_bound is the default -// -inf. Otherwise, this is the count of elements > lower_bound and < 0. -// [5]: zero element count -// [6]: positive element count (excluding +inf), if upper_bound is the default -// +inf. Otherwise, this is the count of elements < upper_bound and > 0. -// [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by -// default. -// Output elements [1:8] are all zero, if the tensor is uninitialized. -// [8]: minimum of all non-inf and non-NaN elements. -// If uninitialized or no such element exists: +inf. -// [9]: maximum of all non-inf and non-NaN elements. -// If uninitialized or no such element exists: -inf. -// [10]: mean of all non-inf and non-NaN elements. -// If uninitialized or no such element exists: NaN. -// [11]: variance of all non-inf and non-NaN elements. -// If uninitialized or no such element exists: NaN. -// [12]: Data type of the tensor encoded as an enum integer. See the DataType -// proto for more details. -// [13]: Number of dimensions of the tensor (ndims). -// [14+]: Sizes of the dimensions. -// -// -// Arguments: -// input: Input tensor, non-Reference type. -func DebugNumericSummary(scope *Scope, input tf.Output, optional ...DebugNumericSummaryAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DebugNumericSummary", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs random integers from a uniform distribution. -// -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. -// -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. -// minval: Minimum value (inclusive, scalar). -// maxval: Maximum value (exclusive, scalar). -// -// Returns Random values with specified shape. -func StatefulUniformInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatefulUniformInt", - Input: []tf.Input{ - resource, algorithm, shape, minval, maxval, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// An Op to exchange data across TPU replicas. -// -// On each replica, the input is split into `split_count` blocks along -// `split_dimension` and send to the other replicas given group_assignment. After -// receiving `split_count` - 1 blocks from other replicas, we concatenate the -// blocks along `concat_dimension` as the output. -// -// For example, suppose there are 2 TPU replicas: -// replica 0 receives input: `[[A, B]]` -// replica 1 receives input: `[[C, D]]` -// -// group_assignment=`[[0, 1]]` -// concat_dimension=0 -// split_dimension=1 -// split_count=2 -// -// replica 0's output: `[[A], [C]]` -// replica 1's output: `[[B], [D]]` -// -// Arguments: -// input: The local input to the sum. -// group_assignment: An int32 tensor with shape -// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the -// replica ids in the ith subgroup. -// concat_dimension: The dimension number to concatenate. -// split_dimension: The dimension number to split. -// split_count: The number of splits, this number must equal to the sub-group -// size(group_assignment.get_shape()[1]) -// -// Returns The exchanged result. -func AllToAll(scope *Scope, input tf.Output, group_assignment tf.Output, concat_dimension int64, split_dimension int64, split_count int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"concat_dimension": concat_dimension, "split_dimension": split_dimension, "split_count": split_count} - opspec := tf.OpSpec{ - Type: "AllToAll", - Input: []tf.Input{ - input, group_assignment, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TridiagonalSolveAttr is an optional argument to TridiagonalSolve. -type TridiagonalSolveAttr func(optionalAttr) - -// TridiagonalSolvePartialPivoting sets the optional partial_pivoting attribute to value. -// -// value: Whether to apply partial pivoting. Partial pivoting makes the procedure more -// stable, but slower. -// If not specified, defaults to true -func TridiagonalSolvePartialPivoting(value bool) TridiagonalSolveAttr { - return func(m optionalAttr) { - m["partial_pivoting"] = value - } -} - -// Solves tridiagonal systems of equations. -// -// Solves tridiagonal systems of equations. -// Supports batch dimensions and multiple right-hand sides per each left-hand -// side. -// On CPU, solution is computed via Gaussian elimination with or without partial -// pivoting, depending on `partial_pivoting` attribute. On GPU, Nvidia's cuSPARSE -// library is used: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv -// Partial pivoting is not yet supported by XLA backends. -// -// Arguments: -// diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the -// tridiagonal matrices with three rows being the superdiagonal, diagonals, and -// subdiagonals, in order. The last element of the superdiagonal and the first -// element of the subdiagonal is ignored. -// rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each -// left-hand side. -// -// Returns Tensor of shape `[..., M, K]` containing the solutions -func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output, optional ...TridiagonalSolveAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TridiagonalSolve", - Input: []tf.Input{ - diagonals, rhs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SvdAttr is an optional argument to Svd. -type SvdAttr func(optionalAttr) - -// SvdComputeUv sets the optional compute_uv attribute to value. -// -// value: If true, left and right singular vectors will be -// computed and returned in `u` and `v`, respectively. -// If false, `u` and `v` are not set and should never referenced. -// If not specified, defaults to true -func SvdComputeUv(value bool) SvdAttr { - return func(m optionalAttr) { - m["compute_uv"] = value - } -} - -// SvdFullMatrices sets the optional full_matrices attribute to value. -// -// value: If true, compute full-sized `u` and `v`. If false -// (the default), compute only the leading `P` singular vectors. -// Ignored if `compute_uv` is `False`. -// If not specified, defaults to false -func SvdFullMatrices(value bool) SvdAttr { - return func(m optionalAttr) { - m["full_matrices"] = value - } -} - -// Computes the singular value decompositions of one or more matrices. -// -// Computes the SVD of each inner matrix in `input` such that -// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` -// -// ```python -// # a is a tensor containing a batch of matrices. -// # s is a tensor of singular values for each matrix. -// # u is the tensor containing the left singular vectors for each matrix. -// # v is the tensor containing the right singular vectors for each matrix. -// s, u, v = svd(a) -// s, _, _ = svd(a, compute_uv=False) -// ``` -// -// Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. -// -// Returns: -// s: Singular values. Shape is `[..., P]`. -// u: Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`. Undefined if `compute_uv` is `False`. -// v: Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. -// Undefined if `compute_uv` is false. -func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Svd", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QrAttr is an optional argument to Qr. -type QrAttr func(optionalAttr) - -// QrFullMatrices sets the optional full_matrices attribute to value. -// -// value: If true, compute full-sized `q` and `r`. If false -// (the default), compute only the leading `P` columns of `q`. -// If not specified, defaults to false -func QrFullMatrices(value bool) QrAttr { - return func(m optionalAttr) { - m["full_matrices"] = value - } -} - -// Computes the QR decompositions of one or more matrices. -// -// Computes the QR decomposition of each inner matrix in `tensor` such that -// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` -// -// ```python -// # a is a tensor. -// # q is a tensor of orthonormal matrices. -// # r is a tensor of upper triangular matrices. -// q, r = qr(a) -// q_full, r_full = qr(a, full_matrices=True) -// ``` -// -// Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. -// -// Returns: -// q: Orthonormal basis for range of `a`. If `full_matrices` is `False` then -// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`. -// r: Triangular factor. If `full_matrices` is `False` then shape is -// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. -func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Qr", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. -type MatrixTriangularSolveAttr func(optionalAttr) - -// MatrixTriangularSolveLower sets the optional lower attribute to value. -// -// value: Boolean indicating whether the innermost matrices in `matrix` are -// lower or upper triangular. -// If not specified, defaults to true -func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { - return func(m optionalAttr) { - m["lower"] = value - } -} - -// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// -// @compatibility(numpy) -// Equivalent to scipy.linalg.solve_triangular -// @end_compatibility -// If not specified, defaults to false -func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Solves systems of linear equations with upper or lower triangular matrices by backsubstitution. -// -// -// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form -// square matrices. If `lower` is `True` then the strictly upper triangular part -// of each inner-most matrix is assumed to be zero and not accessed. -// If `lower` is False then the strictly lower triangular part of each inner-most -// matrix is assumed to be zero and not accessed. -// `rhs` is a tensor of shape `[..., M, N]`. -// -// The output is a tensor of shape `[..., M, N]`. If `adjoint` is -// `True` then the innermost matrices in `output` satisfy matrix equations -// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `False` then the strictly then the innermost matrices in -// `output` satisfy matrix equations -// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. -// -// Note, the batch shapes for the inputs only need to broadcast. -// -// Example: -// ```python -// -// a = tf.constant([[3, 0, 0, 0], -// [2, 1, 0, 0], -// [1, 0, 1, 0], -// [1, 1, 1, 1]], dtype=tf.float32) -// -// b = tf.constant([[4], -// [2], -// [4], -// [2]], dtype=tf.float32) -// -// x = tf.linalg.triangular_solve(a, b, lower=True) -// x -// # -// -// # in python3 one can use `a@x` -// tf.matmul(a, x) -// # -// ``` -// -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixTriangularSolve", - Input: []tf.Input{ - matrix, rhs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. -type SelfAdjointEigV2Attr func(optionalAttr) - -// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. -// -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { - return func(m optionalAttr) { - m["compute_v"] = value - } -} - -// Computes the eigen decomposition of one or more square self-adjoint matrices. -// -// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues -// are sorted in non-decreasing order. -// -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = self_adjoint_eig(a) -// e = self_adjoint_eig(a, compute_v=False) -// ``` -// -// Arguments: -// input: `Tensor` input of shape `[N, N]`. -// -// Returns: -// e: Eigenvalues. Shape is `[N]`. -// v: Eigenvectors. Shape is `[N, N]`. -func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SelfAdjointEigV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes the Eigen Decomposition of a batch of square self-adjoint matrices. -// -// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices, with the same constraints as the single matrix -// SelfAdjointEig. -// -// The result is a [..., M+1, M] matrix with [..., 0,:] containing the -// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues -// are sorted in non-decreasing order. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M+1, M]`. -func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SelfAdjointEig", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the key-value pairs in one or more LMDB files. -// -// The Lightning Memory-Mapped Database Manager, or LMDB, is an embedded binary -// key-value database. This dataset can read the contents of LMDB database files, -// the names of which generally have the `.mdb` suffix. -// -// Each output element consists of a key-value pair represented as a pair of -// scalar string `Tensor`s, where the first `Tensor` contains the key and the -// second `Tensor` contains the value. -// -// LMDB uses different file formats on big- and little-endian machines. -// `LMDBDataset` can only read files in the format of the host machine. -// -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the binary file(s) to be -// read. -// -// -func LMDBDataset(scope *Scope, filenames tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "LMDBDataset", - Input: []tf.Input{ - filenames, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixInverseAttr is an optional argument to MatrixInverse. -type MatrixInverseAttr func(optionalAttr) - -// MatrixInverseAdjoint sets the optional adjoint attribute to value. -// If not specified, defaults to false -func MatrixInverseAdjoint(value bool) MatrixInverseAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Computes the inverse of one or more square invertible matrices or their -// -// adjoints (conjugate transposes). -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the inverse for all input submatrices `[..., :, :]`. -// -// The op uses LU decomposition with partial pivoting to compute the inverses. -// -// If a matrix is not invertible there is no guarantee what the op does. It -// may detect the condition and raise an exception or it may simply return a -// garbage result. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. -// -// @compatibility(numpy) -// Equivalent to np.linalg.inv -// @end_compatibility -func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixInverse", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sign and the log of the absolute value of the determinant of -// -// one or more square matrices. -// -// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions -// form square matrices. The outputs are two tensors containing the signs and -// absolute values of the log determinants for all N input submatrices -// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). -// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU -// is the LU decomposition of the input and P is the corresponding -// permutation matrix. -// -// Arguments: -// input: Shape is `[N, M, M]`. -// -// Returns: -// sign: The signs of the log determinants of the inputs. Shape is `[N]`. -// log_abs_determinant: The logs of the absolute values of the determinants -// of the N input matrices. Shape is `[N]`. -func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogMatrixDeterminant", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes the determinant of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor containing the determinants -// for all input submatrices `[..., :, :]`. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[...]`. -func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDeterminant", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeBilinearAttr is an optional argument to ResizeBilinear. -type ResizeBilinearAttr func(optionalAttr) - -// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBilinearHalfPixelCenters(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Resize `images` to `size` using bilinear interpolation. -// -// Input images can be of different types but output images are always float. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBilinear", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a TensorList by indexing into a Tensor. -// -// Each member of the TensorList corresponds to one row of the input tensor, -// specified by the given index (see `tf.gather`). -// -// tensor: The input tensor. -// indices: The indices used to index into the list. -// element_shape: The shape of the elements in the list (can be less specified than -// the shape of the tensor). -// num_elements: The size of the output list. Must be large enough to accommodate -// the largest index in indices. If -1, the list is just large enough to include -// the largest index in indices. -// output_handle: The TensorList. -func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListScatterV2", - Input: []tf.Input{ - tensor, indices, element_shape, num_elements, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a TensorList by indexing into a Tensor. -// -// Each member of the TensorList corresponds to one row of the input tensor, -// specified by the given index (see `tf.gather`). -// -// tensor: The input tensor. -// indices: The indices used to index into the list. -// element_shape: The shape of the elements in the list (can be less specified than -// the shape of the tensor). -// output_handle: The TensorList. -func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListScatter", - Input: []tf.Input{ - tensor, indices, element_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient of the sigmoid of `x` wrt its input. -// -// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and -// `dy` is the corresponding input gradient. -func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SigmoidGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a Tensor by indexing into the TensorList. -// -// Each row in the produced Tensor corresponds to the element in the TensorList -// specified by the given index (see `tf.gather`). -// -// input_handle: The input tensor list. -// indices: The indices used to index into the list. -// values: The tensor. -func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListGather", - Input: []tf.Input{ - input_handle, indices, element_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// The shape of the elements of the given list, as a tensor. -// -// input_handle: the list -// element_shape: the shape of elements of the list -func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape_type": shape_type} - opspec := tf.OpSpec{ - Type: "TensorListElementShape", - Input: []tf.Input{ - input_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle. -type ExperimentalThreadPoolHandleAttr func(optionalAttr) - -// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. -// -// value: The maximum degree of parallelism to use within operations that execute on this -// threadpool. -// If not specified, defaults to 1 -func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr { - return func(m optionalAttr) { - m["max_intra_op_parallelism"] = value - } -} - -// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// num_threads: The number of threads in the thread pool. -// display_name: A human-readable name for the threads that may be visible in some -// visualizations. -// threadpool. -// -// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset -// ops. -func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExperimentalThreadPoolHandle", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a TensorList which, when stacked, has the value of `tensor`. -// -// Each tensor in the result list corresponds to one row of the input tensor. -// -// tensor: The input tensor. -// output_handle: The list. -func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListFromTensor", - Input: []tf.Input{ - tensor, element_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorListStackAttr is an optional argument to TensorListStack. -type TensorListStackAttr func(optionalAttr) - -// TensorListStackNumElements sets the optional num_elements attribute to value. -// If not specified, defaults to -1 -func TensorListStackNumElements(value int64) TensorListStackAttr { - return func(m optionalAttr) { - m["num_elements"] = value - } -} - -// Stacks all tensors in the list. -// -// Requires that all tensors have the same shape. -// -// input_handle: the input list -// tensor: the gathered result -// num_elements: optional. If not -1, the number of elements in the list. -// -func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorListStack", - Input: []tf.Input{ - input_handle, element_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the number of tensors in the input tensor list. -// -// input_handle: the input list -// length: the number of tensors in the list -func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListLength", - Input: []tf.Input{ - input_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Merges summaries. -// -// This op creates a -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// protocol buffer that contains the union of all the values in the input -// summaries. -// -// When the Op is run, it reports an `InvalidArgument` error if multiple values -// in the summaries to merge use the same tag. -// -// Arguments: -// inputs: Can be of any shape. Each must contain serialized `Summary` protocol -// buffers. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MergeSummary", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AvgPoolAttr is an optional argument to AvgPool. -type AvgPoolAttr func(optionalAttr) - -// AvgPoolDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolDataFormat(value string) AvgPoolAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs average pooling on the input. -// -// Each entry in `output` is the mean of the corresponding size `ksize` -// window in `value`. -// -// Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// ksize: The size of the sliding window for each dimension of `value`. -// strides: The stride of the sliding window for each dimension of `value`. -// padding: The type of padding algorithm to use. -// -// Returns The average pooled output tensor. -func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) - -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummaryV2", - Input: []tf.Input{ - tag, tensor, sample_rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a histogram. -// -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. -// -// This op reports an `InvalidArgument` error if any value is not finite. -// -// Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "HistogramSummary", - Input: []tf.Input{ - tag, values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringLengthAttr is an optional argument to StringLength. -type StringLengthAttr func(optionalAttr) - -// StringLengthUnit sets the optional unit attribute to value. -// -// value: The unit that is counted to compute string length. One of: `"BYTE"` (for -// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8 -// encoded Unicode code points in each string). Results are undefined -// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally -// valid UTF-8. -// If not specified, defaults to "BYTE" -func StringLengthUnit(value string) StringLengthAttr { - return func(m optionalAttr) { - m["unit"] = value - } -} - -// String lengths of `input`. -// -// Computes the length of each string given in the input tensor. -// -// >>> strings = tf.constant(['Hello','TensorFlow', '\U0001F642']) -// >>> tf.strings.length(strings).numpy() # default counts bytes -// array([ 5, 10, 4], dtype=int32) -// >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy() -// array([ 5, 10, 1], dtype=int32) -// -// -// Arguments: -// input: The strings for which to compute the length for each element. -// -// Returns Integer tensor that has the same shape as `input`. The output contains the -// element-wise string lengths of `input`. -func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringLength", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorSummaryAttr is an optional argument to TensorSummary. -type TensorSummaryAttr func(optionalAttr) - -// TensorSummaryDescription sets the optional description attribute to value. -// -// value: A json-encoded SummaryDescription proto. -// If not specified, defaults to "" -func TensorSummaryDescription(value string) TensorSummaryAttr { - return func(m optionalAttr) { - m["description"] = value - } -} - -// TensorSummaryLabels sets the optional labels attribute to value. -// -// value: An unused list of strings. -// If not specified, defaults to <> -func TensorSummaryLabels(value []string) TensorSummaryAttr { - return func(m optionalAttr) { - m["labels"] = value - } -} - -// TensorSummaryDisplayName sets the optional display_name attribute to value. -// -// value: An unused string. -// If not specified, defaults to "" -func TensorSummaryDisplayName(value string) TensorSummaryAttr { - return func(m optionalAttr) { - m["display_name"] = value - } -} - -// Outputs a `Summary` protocol buffer with a tensor. -// -// This op is being phased out in favor of TensorSummaryV2, which lets callers pass -// a tag as well as a serialized SummaryMetadata proto string that contains -// plugin-specific data. We will keep this op to maintain backwards compatibility. -// -// Arguments: -// tensor: A tensor to serialize. -func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorSummary", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Scatters tensor at indices in an input list. -// -// Each member of the TensorList corresponds to one row of the input tensor, -// specified by the given index (see `tf.gather`). -// -// input_handle: The list to scatter into. -// tensor: The input tensor. -// indices: The indices used to index into the list. -// output_handle: The TensorList. -func TensorListScatterIntoExistingList(scope *Scope, input_handle tf.Output, tensor tf.Output, indices tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListScatterIntoExistingList", - Input: []tf.Input{ - input_handle, tensor, indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. -// -// Arguments: -// tag: A string attached to this summary. Used for organization in TensorBoard. -// tensor: A tensor to serialize. -// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin -// data. -func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorSummaryV2", - Input: []tf.Input{ - tag, tensor, serialized_summary_metadata, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the sqrt of `x` wrt its input. -// -// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` -// is the corresponding input gradient. -func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SqrtGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. -type MutableHashTableOfTensorsV2Attr func(optionalAttr) - -// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. -// If not specified, defaults to <> -func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value - } -} - -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a vector. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableHashTableOfTensorsV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors. -// -// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the -// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each -// input channel is processed independently of the others with its own structuring -// function. The `output` tensor has shape -// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output -// tensor depend on the `padding` algorithm. We currently only support the default -// "NHWC" `data_format`. -// -// In detail, the grayscale morphological 2-D dilation is the max-sum correlation -// (for consistency with `conv2d`, we use unmirrored filters): -// -// output[b, y, x, c] = -// max_{dy, dx} input[b, -// strides[1] * y + rates[1] * dy, -// strides[2] * x + rates[2] * dx, -// c] + -// filter[dy, dx, c] -// -// Max-pooling is a special case when the filter has size equal to the pooling -// kernel size and contains all zeros. -// -// Note on duality: The dilation of `input` by the `filter` is equal to the -// negation of the erosion of `-input` by the reflected `filter`. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// strides: The stride of the sliding window for each dimension of the input -// tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: The input stride for atrous morphological dilation. Must be: -// `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape `[batch, out_height, out_width, depth]`. -func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "Dilation2D", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softplus: `log(exp(features) + 1)`. -func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Softplus", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. -type MutableHashTableV2Attr func(optionalAttr) - -// MutableHashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableV2Container(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableHashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Calculates the prior from the training data (the bias) and fills in the first node with the logits' prior. Returns a boolean indicating whether to continue centering. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// mean_gradients: A tensor with shape=[logits_dimension] with mean of gradients for a first node. -// mean_hessians: A tensor with shape=[logits_dimension] mean of hessians for a first node. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// -// Returns Bool, whether to continue bias centering. -func BoostedTreesCenterBias(scope *Scope, tree_ensemble_handle tf.Output, mean_gradients tf.Output, mean_hessians tf.Output, l1 tf.Output, l2 tf.Output) (continue_centering tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesCenterBias", - Input: []tf.Input{ - tree_ensemble_handle, mean_gradients, mean_hessians, l1, l2, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) - -// HashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// HashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// Creates a non-initialized hash table. -// -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Check if the input matches the regex pattern. -// -// The input is a string tensor of any shape. The pattern is a scalar -// string tensor which is applied to every element of the input tensor. -// The boolean values (True or False) of the output tensor indicate -// if the input matches the regex pattern provided. -// -// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) -// -// Examples: -// -// >>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*lib$") -// -// >>> tf.strings.regex_full_match(["TF lib", "lib TF"], ".*TF$") -// -// -// Arguments: -// input: A string tensor of the text to be processed. -// pattern: A scalar string tensor containing the regular expression to match the input. -// -// Returns A bool tensor with the same shape as `input`. -func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RegexFullMatch", - Input: []tf.Input{ - input, pattern, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixDiagV3Attr is an optional argument to MatrixDiagV3. -type MatrixDiagV3Attr func(optionalAttr) - -// MatrixDiagV3Align sets the optional align attribute to value. -// -// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is -// a string specifying how superdiagonals and subdiagonals should be aligned, -// respectively. There are four possible alignments: "RIGHT_LEFT" (default), -// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals -// to the right (left-pads the row) and subdiagonals to the left (right-pads the -// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is -// the opposite alignment. -// If not specified, defaults to "RIGHT_LEFT" -func MatrixDiagV3Align(value string) MatrixDiagV3Attr { - return func(m optionalAttr) { - m["align"] = value - } -} - -// Returns a batched diagonal tensor with given batched diagonal values. -// -// Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th -// diagonals of a matrix, with everything else padded with `padding`. `num_rows` -// and `num_cols` specify the dimension of the innermost matrix of the output. If -// both are not specified, the op assumes the innermost matrix is square and infers -// its size from `k` and the innermost dimension of `diagonal`. If only one of them -// is specified, the op assumes the unspecified value is the smallest possible -// based on other criteria. -// -// Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has -// rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one -// diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank -// `r` with shape `[I, J, ..., L, num_rows, num_cols]`. -// -// The second innermost dimension of `diagonal` has double meaning. -// When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size -// [I, J, ..., M], and the output tensor is: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper -// padding_value ; otherwise -// ``` -// -// Otherwise, `M` is treated as the number of diagonals for the matrix in the -// same batch (`M = k[1]-k[0]+1`), and the output tensor is: -// -// ``` -// output[i, j, ..., l, m, n] -// = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1] -// padding_value ; otherwise -// ``` -// where `d = n - m`, `diag_index = [k] - d`, and -// `index_in_diag = n - max(d, 0) + offset`. -// -// `offset` is zero except when the alignment of the diagonal is to the right. -// ``` -// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT} -// and `d >= 0`) or -// (`align` in {LEFT_RIGHT, RIGHT_RIGHT} -// and `d <= 0`) -// 0 ; otherwise -// ``` -// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`. -// -// For example: -// -// ``` -// # The main diagonal. -// diagonal = np.array([[1, 2, 3, 4], # Input shape: (2, 4) -// [5, 6, 7, 8]]) -// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0], # Output shape: (2, 4, 4) -// [0, 2, 0, 0], -// [0, 0, 3, 0], -// [0, 0, 0, 4]], -// [[5, 0, 0, 0], -// [0, 6, 0, 0], -// [0, 0, 7, 0], -// [0, 0, 0, 8]]] -// -// # A superdiagonal (per batch). -// diagonal = np.array([[1, 2, 3], # Input shape: (2, 3) -// [4, 5, 6]]) -// tf.matrix_diag(diagonal, k = 1) -// ==> [[[0, 1, 0, 0], # Output shape: (2, 4, 4) -// [0, 0, 2, 0], -// [0, 0, 0, 3], -// [0, 0, 0, 0]], -// [[0, 4, 0, 0], -// [0, 0, 5, 0], -// [0, 0, 0, 6], -// [0, 0, 0, 0]]] -// -// # A tridiagonal band (per batch). -// diagonals = np.array([[[0, 8, 9], # Input shape: (2, 2, 3) -// [1, 2, 3], -// [4, 5, 0]], -// [[0, 2, 3], -// [6, 7, 9], -// [9, 1, 0]]]) -// tf.matrix_diag(diagonals, k = (-1, 1)) -// ==> [[[1, 8, 0], # Output shape: (2, 3, 3) -// [4, 2, 9], -// [0, 5, 3]], -// [[6, 2, 0], -// [9, 7, 3], -// [0, 1, 9]]] -// -// # LEFT_RIGHT alignment. -// diagonals = np.array([[[8, 9, 0], # Input shape: (2, 2, 3) -// [1, 2, 3], -// [0, 4, 5]], -// [[2, 3, 0], -// [6, 7, 9], -// [0, 9, 1]]]) -// tf.matrix_diag(diagonals, k = (-1, 1), align="LEFT_RIGHT") -// ==> [[[1, 8, 0], # Output shape: (2, 3, 3) -// [4, 2, 9], -// [0, 5, 3]], -// [[6, 2, 0], -// [9, 7, 3], -// [0, 1, 9]]] -// -// # Rectangular matrix. -// diagonal = np.array([1, 2]) # Input shape: (2) -// tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4) -// ==> [[0, 0, 0, 0], # Output shape: (3, 4) -// [1, 0, 0, 0], -// [0, 2, 0, 0]] -// -// # Rectangular matrix with inferred num_cols and padding_value = 9. -// tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding_value = 9) -// ==> [[9, 9], # Output shape: (3, 2) -// [1, 9], -// [9, 2]] -// -// ``` -// -// Arguments: -// diagonal: Rank `r`, where `r >= 1` -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// num_rows: The number of rows of the output matrix. If it is not provided, the op assumes -// the output matrix is a square matrix and infers the matrix size from k and the -// innermost dimension of `diagonal`. -// num_cols: The number of columns of the output matrix. If it is not provided, the op -// assumes the output matrix is a square matrix and infers the matrix size from -// k and the innermost dimension of `diagonal`. -// padding_value: The number to fill the area outside the specified diagonal band with. -// Default is 0. -// -// Returns Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise. -func MatrixDiagV3(scope *Scope, diagonal tf.Output, k tf.Output, num_rows tf.Output, num_cols tf.Output, padding_value tf.Output, optional ...MatrixDiagV3Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixDiagV3", - Input: []tf.Input{ - diagonal, k, num_rows, num_cols, padding_value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high overlaps -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. N-by-n overlap values are supplied as square matrix, -// which allows for defining a custom overlap criterium (eg. intersection over union, -// intersection over area, etc.). -// -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// -// selected_indices = tf.image.non_max_suppression_with_overlaps( -// overlaps, scores, max_output_size, overlap_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) -// -// Arguments: -// overlaps: A 2-D float tensor of shape `[num_boxes, num_boxes]` representing -// the n-by-n box overlap values. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// overlap_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. -// -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.Output, max_output_size tf.Output, overlap_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NonMaxSuppressionWithOverlaps", - Input: []tf.Input{ - overlaps, scores, max_output_size, overlap_threshold, score_threshold, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs all keys and values in the table. -// -// Arguments: -// table_handle: Handle to the table. -// -// -// -// Returns: -// keys: Vector of all keys present in the table. -// values: Tensor of all values in the table. Indexed in parallel with `keys`. -func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} - opspec := tf.OpSpec{ - Type: "LookupTableExportV2", - Input: []tf.Input{ - table_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// RetrieveTPUEmbeddingAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingAdagradParameters. -type RetrieveTPUEmbeddingAdagradParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingAdagradParametersTableId(value int64) RetrieveTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdagradParametersTableName(value string) RetrieveTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdagradParametersConfig(value string) RetrieveTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Adagrad embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Adagrad optimization algorithm. -// accumulators: Parameter accumulators updated by the Adagrad optimization algorithm. -func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingAdagradParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. -// -// N is the size of the segment being reduced. -// -// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is -// missing, the `output` tensor at that position will be zeroed. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNWithNumSegments", - Input: []tf.Input{ - data, indices, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the Cholesky decomposition of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. -// -// The input has to be symmetric and positive definite. Only the lower-triangular -// part of the input will be used for this operation. The upper-triangular part -// will not be read. -// -// The output is a tensor of the same shape as the input -// containing the Cholesky decompositions for all input submatrices `[..., :, :]`. -// -// **Note**: The gradient computation on GPU is faster for large matrices but -// not for large batch dimensions when the submatrices are small. In this -// case it might be faster to use the CPU. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. -func Cholesky(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Cholesky", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Splits a tensor into a list. -// -// list[i] corresponds to lengths[i] tensors from the input tensor. -// The tensor must have rank at least 1 and contain exactly sum(lengths) elements. -// -// tensor: The input tensor. -// element_shape: A shape compatible with that of elements in the tensor. -// lengths: Vector of sizes of the 0th dimension of tensors in the list. -// output_handle: The list. -func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, lengths tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListSplit", - Input: []tf.Input{ - tensor, element_shape, lengths, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Check if the input matches the regex pattern. -// -// The input is a string tensor of any shape. The pattern is the -// regular expression to be matched with every element of the input tensor. -// The boolean values (True or False) of the output tensor indicate -// if the input matches the regex pattern provided. -// -// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) -// -// Arguments: -// input: A string tensor of the text to be processed. -// pattern: The regular expression to match the input. -// -// Returns A bool tensor with the same shape as `input`. -func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pattern": pattern} - opspec := tf.OpSpec{ - Type: "StaticRegexFullMatch", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseSingleSequenceExampleAttr is an optional argument to ParseSingleSequenceExample. -type ParseSingleSequenceExampleAttr func(optionalAttr) - -// ParseSingleSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["context_sparse_types"] = value - } -} - -// ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSingleSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. -// -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value - } -} - -// ParseSingleSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. -// -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_sparse_types"] = value - } -} - -// ParseSingleSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. -// -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value - } -} - -// Transforms a scalar brain.SequenceExample proto (as strings) into typed tensors. -// -// Arguments: -// serialized: A scalar containing a binary serialized SequenceExample proto. -// feature_list_dense_missing_assumed_empty: A vector listing the -// FeatureList keys which may be missing from the SequenceExample. If the -// associated FeatureList is missing, it is treated as empty. By default, -// any FeatureList not listed in this vector must exist in the SequenceExample. -// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). -// The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' context features associated with -// dense values. -// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors -// (scalars). The keys expected in the FeatureLists associated with sparse -// values. -// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -// debug_name: A scalar containing the name of the serialized proto. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty scalar if no name is available. -func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_sparse_keys []tf.Output, context_dense_keys []tf.Output, feature_list_sparse_keys []tf.Output, feature_list_dense_keys []tf.Output, context_dense_defaults []tf.Output, debug_name tf.Output, optional ...ParseSingleSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParseSingleSequenceExample", - Input: []tf.Input{ - serialized, feature_list_dense_missing_assumed_empty, tf.OutputList(context_sparse_keys), tf.OutputList(context_dense_keys), tf.OutputList(feature_list_sparse_keys), tf.OutputList(feature_list_dense_keys), tf.OutputList(context_dense_defaults), debug_name, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSingleSequenceExample", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values -} - -// Computes the number of elements in the given table. -// -// Arguments: -// table_handle: Handle to the table. -// -// Returns Scalar that contains number of elements in the table. -func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableSizeV2", - Input: []tf.Input{ - table_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes inverse hyperbolic sine of x element-wise. -// -// Given an input tensor, this function computes inverse hyperbolic sine -// for every element in the tensor. Both input and output has a range of -// `[-inf, inf]`. -// -// ```python -// x = tf.constant([-float("inf"), -2, -0.5, 1, 1.2, 200, 10000, float("inf")]) -// tf.math.asinh(x) ==> [-inf -1.4436355 -0.4812118 0.8813736 1.0159732 5.991471 9.903487 inf] -// ``` -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Asinh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Looks up keys in a table, outputs the corresponding values. -// -// The tensor `keys` must of the same type as the keys of the table. -// The output `values` is of the type of the table values. -// -// The scalar `default_value` is the value output for keys not present in the -// table. It must also be of the same type as the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// -// -// Returns Same shape as `keys`. Values found in the table, or `default_values` -// for missing keys. -func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableFindV2", - Input: []tf.Input{ - table_handle, keys, default_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolGradAttr is an optional argument to MaxPoolGrad. -type MaxPoolGradAttr func(optionalAttr) - -// MaxPoolGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradDataFormat(value string) MaxPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Rolls the elements of a tensor along an axis. -// -// The elements are shifted positively (towards larger indices) by the offset of -// `shift` along the dimension of `axis`. Negative `shift` values will shift -// elements in the opposite direction. Elements that roll passed the last position -// will wrap around to the first and vice versa. Multiple shifts along multiple -// axes may be specified. -// -// For example: -// -// ``` -// # 't' is [0, 1, 2, 3, 4] -// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2] -// -// # shifting along multiple dimensions -// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] -// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]] -// -// # shifting along the same axis multiple times -// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] -// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]] -// ``` -// -// Arguments: -// -// shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which -// elements are shifted positively (towards larger indices) along the dimension -// specified by `axis[i]`. Negative shifts will roll the elements in the opposite -// direction. -// axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift -// `shift[i]` should occur. If the same axis is referenced more than once, the -// total shift for that axis will be the sum of all the shifts that belong to that -// axis. -// -// Returns Has the same shape and size as the input. The elements are shifted -// positively (towards larger indices) by the offsets of `shift` along the -// dimensions of `axis`. -func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Roll", - Input: []tf.Input{ - input, shift, axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) - -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } -} - -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. -// -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RestoreSlice", - Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage. -type OrderedMapUnstageAttr func(optionalAttr) - -// OrderedMapUnstageCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageCapacity(value int64) OrderedMapUnstageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapUnstageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapUnstageMemoryLimit(value int64) OrderedMapUnstageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapUnstageContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageContainer(value string) OrderedMapUnstageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapUnstageSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageSharedName(value string) OrderedMapUnstageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the values associated with the key -// -// from the underlying container. If the underlying container -// does not contain this key, the op will block until it does. -func OrderedMapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapUnstage", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapUnstage", err) - return - } - return values -} - -// SobolSampleAttr is an optional argument to SobolSample. -type SobolSampleAttr func(optionalAttr) - -// SobolSampleDtype sets the optional dtype attribute to value. -// -// value: The type of the sample. One of: `float32` or `float64`. -// If not specified, defaults to DT_FLOAT -func SobolSampleDtype(value tf.DataType) SobolSampleAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Generates points from the Sobol sequence. -// -// Creates a Sobol sequence with `num_results` samples. Each sample has dimension -// `dim`. Skips the first `skip` samples. -// -// Arguments: -// dim: Positive scalar `Tensor` representing each sample's dimension. -// num_results: Positive scalar `Tensor` of dtype int32. The number of Sobol points to return -// in the output. -// skip: Positive scalar `Tensor` of dtype int32. The number of initial points of the -// Sobol sequence to skip. -// -// Returns `Tensor` of samples from Sobol sequence with `shape` [num_results, dim]. -func SobolSample(scope *Scope, dim tf.Output, num_results tf.Output, skip tf.Output, optional ...SobolSampleAttr) (samples tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SobolSample", - Input: []tf.Input{ - dim, num_results, skip, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedReluAttr is an optional argument to QuantizedRelu. -type QuantizedReluAttr func(optionalAttr) - -// QuantizedReluOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes Quantized Rectified Linear: `max(features, 0)` -// -// Arguments: -// -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. -// -// Returns: -// activations: Has the same output shape as "features". -// min_activations: The float value that the lowest quantized value represents. -// max_activations: The float value that the highest quantized value represents. -func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedRelu", - Input: []tf.Input{ - features, min_features, max_features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the next representable value of `x1` in the direction of `x2`, element-wise. -// -// This operation returns the same result as the C++ std::nextafter function. -// -// It can also return a subnormal number. -// -// @compatibility(cpp) -// Equivalent to C++ std::nextafter function. -// @end_compatibility -func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NextAfter", - Input: []tf.Input{ - x1, x2, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Bucketizes 'input' based on 'boundaries'. -// -// For example, if the inputs are -// boundaries = [0, 10, 100] -// input = [[-5, 10000] -// [150, 10] -// [5, 100]] -// -// then the output will be -// output = [[0, 3] -// [3, 2] -// [1, 3]] -// -// Arguments: -// input: Any shape of Tensor contains with int or float type. -// boundaries: A sorted list of floats gives the boundary of the buckets. -// -// Returns Same shape with 'input', each value of input replaced with bucket index. -// -// @compatibility(numpy) -// Equivalent to np.digitize. -// @end_compatibility -func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"boundaries": boundaries} - opspec := tf.OpSpec{ - Type: "Bucketize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the log of the absolute value of `Gamma(x)` element-wise. -// -// For positive numbers, this function computes log((input - 1)!) for every element in the tensor. -// `lgamma(5) = log((5-1)!) = log(4!) = log(24) = 3.1780539` -// -// Example: -// -// ```python -// x = tf.constant([0, 0.5, 1, 4.5, -4, -5.6]) -// tf.math.lgamma(x) ==> [inf, 0.5723649, 0., 2.4537368, inf, -4.6477685] -// ``` -func Lgamma(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Lgamma", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reads the value of a variable. -// -// The tensor returned by this operation is immutable. -// -// The value returned by this operation is guaranteed to be influenced by all the -// writes on which this operation depends directly or indirectly, and to not be -// influenced by any of the writes which depend directly or indirectly on this -// operation. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// dtype: the dtype of the value. -func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "ReadVariableOp", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes a range that covers the actual values present in a quantized tensor. -// -// Given a quantized tensor described by `(input, input_min, input_max)`, outputs a -// range that covers the actual values present in that tensor. This op is typically -// used to produce the `requested_output_min` and `requested_output_max` for -// `Requantize`. -// -// Arguments: -// -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// -// Returns: -// output_min: The computed min output. -// output_max: the computed max output. -func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RequantizationRange", - Input: []tf.Input{ - input, input_min, input_max, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`. -// -// Each comparison returns a boolean `true` (if `input_value > threshold`) -// or and `false` otherwise. -// -// This operation is useful for Locality-Sensitive-Hashing (LSH) and other -// algorithms that use hashing approximations of cosine and `L2` distances; -// codes can be generated from an input via: -// -// ```python -// codebook_size = 50 -// codebook_bits = codebook_size * 32 -// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits], -// dtype=x.dtype, -// initializer=tf.orthogonal_initializer()) -// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.) -// codes = tf.bitcast(codes, tf.int32) # go from uint8 to int32 -// # now codes has shape x.shape[:-1] + [codebook_size] -// ``` -// -// **NOTE**: Currently, the innermost dimension of the tensor must be divisible -// by 8. -// -// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is -// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`. -// -// Arguments: -// input: Values to compare against `threshold` and bitpack. -// threshold: Threshold to compare against. -// -// Returns The bitpacked comparisons. -func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CompareAndBitpack", - Input: []tf.Input{ - input, threshold, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Tensor contraction according to Einstein summation convention. -// -// Implements generalized Tensor contraction and reduction. Each input Tensor must -// have a corresponding input subscript appearing in the comma-separated left-hand -// side of the equation. The right-hand side of the equation consists of the -// output subscript. The input subscripts and the output subscript should consist -// of zero or more named axis labels and at most one ellipsis (`...`). -// -// The named axis labels may be any single character other than those having -// special meaning, namely `,.->`. The behavior of this Op is undefined if it -// receives an ill-formatted equation; since the validation is done at -// graph-building time, we omit format validation checks at runtime. -// -// Note: This Op is *not* intended to be called by the user; instead users should -// call `tf.einsum` directly. It is a hidden Op used by `tf.einsum`. -// -// Operations are applied to the input(s) according to the following rules: -// -// (a) Generalized Diagonals: For input dimensions corresponding to axis labels -// appearing more than once in the same input subscript, we take the -// generalized (`k`-dimensional) diagonal. -// For example, in the equation `iii->i` with input shape `[3, 3, 3]`, the -// generalized diagonal would consist of `3` elements at indices `(0, 0, 0)`, -// `(1, 1, 1)` and `(2, 2, 2)` to create a Tensor of shape `[3]`. -// -// (b) Reduction: Axes corresponding to labels appearing only in one input -// subscript but not in the output subscript are summed over prior to Tensor -// contraction. -// For example, in the equation `ab,bc->b`, the axis labels `a` and `c` are -// the reduction axis labels. -// -// (c) Batch Dimensions: Axes corresponding to labels appearing in each of the -// input subscripts and also in the output subscript make up the batch -// dimensions in Tensor contraction. Unnamed axis labels corresponding to -// ellipsis (`...`) also correspond to batch dimensions. -// For example, for the equation denoting batch matrix multiplication, -// `bij,bjk->bik`, the axis label `b` corresponds to a batch dimension. -// -// (d) Contraction: In case of binary einsum, axes corresponding to labels -// appearing in two different inputs (and not in the output) are contracted -// against each other. -// Considering the batch matrix multiplication equation again -// (`bij,bjk->bik`), the contracted axis label is `j`. -// -// (e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis -// labels, the opposite operation of (a) is applied. For example, in the -// equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]` -// are all zeros, except for the (generalized) diagonal which is populated -// with values from the input. -// Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is -// provided to enable computing the symbolic gradient of `tf.einsum`. -// -// The output subscripts must contain only labels appearing in at least one of the -// input subscripts. Furthermore, all dimensions mapping to the same axis label -// must be equal. -// -// Any of the input and output subscripts may contain at most a single ellipsis -// (`...`). These ellipsis are mapped against dimensions not corresponding to any -// named axis label. If two inputs contain ellipsis, then they are broadcasted -// according to standard NumPy broadcasting -// [rules](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). -// -// The broadcasted dimensions are placed in the corresponding location of the -// ellipsis in the output subscript. If the broadcasted dimensions are non-empty -// and the output subscripts do not contain ellipsis, then an InvalidArgument error -// is raised. -// -// @compatibility(numpy) -// Similar to [`numpy.einsum`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html). -// -// Comparison with `numpy.einsum`: -// -// * This Op only supports unary and binary forms of `numpy.einsum`. -// * This Op does not support implicit form. (i.e. equations without `->`). -// * This Op also supports repeated indices in the output subscript, which is not -// supported by `numpy.einsum`. -// @end_compatibility -// -// -// Arguments: -// inputs: List of 1 or 2 Tensors. -// equation: String describing the Einstein Summation operation; in the format of np.einsum. -// -// Returns Output Tensor with shape depending upon `equation`. -func Einsum(scope *Scope, inputs []tf.Output, equation string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"equation": equation} - opspec := tf.OpSpec{ - Type: "Einsum", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Convert the quantized 'input' tensor into a lower-precision 'output', using the -// -// actual distribution of the values to maximize the usage of the lower bit depth -// and adjusting the output min and max ranges accordingly. -// -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. -// -// This operator tries to squeeze as much precision as possible into an output with -// a lower bit depth by calculating the actual min and max values found in the -// data. For example, maybe that quint16 input has no values lower than 16,384 and -// none higher than 49,152. That means only half the range is actually needed, all -// the float interpretations are between -0.5f and 0.5f, so if we want to compress -// the data into a quint8 output, we can use that range rather than the theoretical -// -1.0f to 1.0f that is suggested by the input min and max. -// -// In practice, this is most useful for taking output from operations like -// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and -// may have large potential output ranges, but in practice have a distribution of -// input values that only uses a small fraction of the possible range. By feeding -// that output into this operator, we can reduce it from 32 bits down to 8 with -// minimal loss of accuracy. -// -// Arguments: -// -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. -// -// Returns: -// output -// output_min: The float value that the minimum quantized output value represents. -// output_max: The float value that the maximum quantized output value represents. -func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - opspec := tf.OpSpec{ - Type: "QuantizeDownAndShrinkRange", - Input: []tf.Input{ - input, input_min, input_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. -// -// Note that the hash function may change from time to time. -// This functionality will be deprecated and it's recommended to use -// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. -// -// Arguments: -// -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucket", - Input: []tf.Input{ - string_tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softsign: `features / (abs(features) + 1)`. -func Softsign(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Softsign", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedAddAttr is an optional argument to QuantizedAdd. -type QuantizedAddAttr func(optionalAttr) - -// QuantizedAddToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// Returns x + y element-wise, working on quantized buffers. -// -// Arguments: -// -// -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. -// -// Returns: -// z -// min_z: The float value that the lowest quantized output value represents. -// max_z: The float value that the highest quantized output value represents. -// -// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedAdd", - Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ShuffleAndRepeatDatasetAttr is an optional argument to ShuffleAndRepeatDataset. -type ShuffleAndRepeatDatasetAttr func(optionalAttr) - -// ShuffleAndRepeatDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. -// If not specified, defaults to true -func ShuffleAndRepeatDatasetReshuffleEachIteration(value bool) ShuffleAndRepeatDatasetAttr { - return func(m optionalAttr) { - m["reshuffle_each_iteration"] = value - } -} - -// Creates a dataset that shuffles and repeats elements from `input_dataset` -// -// pseudorandomly. -// -// Arguments: -// -// buffer_size: The number of output elements to buffer in an iterator over -// this dataset. Compare with the `min_after_dequeue` attr when creating a -// `RandomShuffleQueue`. -// seed: A scalar seed for the random number generator. If either `seed` or -// `seed2` is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// count: A scalar representing the number of times the underlying dataset -// should be repeated. The default is `-1`, which results in infinite repetition. -// -// -func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleAndRepeatDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ShuffleAndRepeatDataset", - Input: []tf.Input{ - input_dataset, buffer_size, seed, seed2, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedMulAttr is an optional argument to QuantizedMul. -type QuantizedMulAttr func(optionalAttr) - -// QuantizedMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// Returns x * y element-wise, working on quantized buffers. -// -// Arguments: -// -// -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. -// -// Returns: -// z -// min_z: The float value that the lowest quantized output value represents. -// max_z: The float value that the highest quantized output value represents. -// -// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMul", - Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// CumulativeLogsumexpAttr is an optional argument to CumulativeLogsumexp. -type CumulativeLogsumexpAttr func(optionalAttr) - -// CumulativeLogsumexpExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumulative log-sum-exp. -// If not specified, defaults to false -func CumulativeLogsumexpExclusive(value bool) CumulativeLogsumexpAttr { - return func(m optionalAttr) { - m["exclusive"] = value - } -} - -// CumulativeLogsumexpReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumulativeLogsumexpReverse(value bool) CumulativeLogsumexpAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative product of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumulative log-sum-exp, -// which means that the first -// element of the input is identical to the first element of the output: -// ```python -// tf.math.cumulative_logsumexp([a, b, c]) # => [a, log(exp(a) + exp(b)), log(exp(a) + exp(b) + exp(c))] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumulative log-sum-exp is -// performed instead: -// ```python -// tf.cumulative_logsumexp([a, b, c], exclusive=True) # => [-inf, a, log(exp(a) * exp(b))] -// ``` -// Note that the neutral element of the log-sum-exp operation is `-inf`, -// however, for performance reasons, the minimal value representable by the -// floating point type is used instead. -// -// By setting the `reverse` kwarg to `True`, the cumulative log-sum-exp is performed in the -// opposite direction. -// -// Arguments: -// x: A `Tensor`. Must be one of the following types: `float16`, `float32`, `float64`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func CumulativeLogsumexp(scope *Scope, x tf.Output, axis tf.Output, optional ...CumulativeLogsumexpAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CumulativeLogsumexp", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent. -type ResourceApplyGradientDescentAttr func(optionalAttr) - -// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' by subtracting 'alpha' * 'delta' from it. -// -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// delta: The change. -// -// Returns the created operation. -func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyGradientDescent", - Input: []tf.Input{ - var_, alpha, delta, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the matrix logarithm of one or more square matrices: -// -// -// \\(log(exp(A)) = A\\) -// -// This op is only defined for complex matrices. If A is positive-definite and -// real, then casting to a complex matrix, taking the logarithm and casting back -// to a real matrix will give the correct result. -// -// This function computes the matrix logarithm using the Schur-Parlett algorithm. -// Details of the algorithm can be found in Section 11.6.2 of: -// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008. -// ISBN 978-0-898716-46-7. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the exponential for all input submatrices `[..., :, :]`. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. -// -// @compatibility(scipy) -// Equivalent to scipy.linalg.logm -// @end_compatibility -func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixLogarithm", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseBincountAttr is an optional argument to SparseBincount. -type SparseBincountAttr func(optionalAttr) - -// SparseBincountBinaryOutput sets the optional binary_output attribute to value. -// -// value: bool; Whether the kernel should count the appearance or number of occurrences. -// If not specified, defaults to false -func SparseBincountBinaryOutput(value bool) SparseBincountAttr { - return func(m optionalAttr) { - m["binary_output"] = value - } -} - -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. -// -// Values in `arr` outside of the range [0, size) are ignored. -// -// Arguments: -// indices: 2D int64 `Tensor`. -// values: 1D int `Tensor`. -// dense_shape: 1D int64 `Tensor`. -// size: non-negative int scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. -// -// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`]. -// The counts or summed weights for each value in the range [0, size). -func SparseBincount(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, size tf.Output, weights tf.Output, optional ...SparseBincountAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseBincount", - Input: []tf.Input{ - indices, values, dense_shape, size, weights, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Calculate product with tridiagonal matrix. -// -// Calculates product of two matrices, where left matrix is a tridiagonal matrix. -// -// Arguments: -// superdiag: Tensor of shape `[..., 1, M]`, representing superdiagonals of -// tri-diagonal matrices to the left of multiplication. Last element is ignored. -// maindiag: Tensor of shape `[..., 1, M]`, representing main diagonals of tri-diagonal -// matrices to the left of multiplication. -// subdiag: Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal -// matrices to the left of multiplication. First element is ignored. -// rhs: Tensor of shape `[..., M, N]`, representing MxN matrices to the right of -// multiplication. -// -// Returns Tensor of shape `[..., M, N]` containing the product. -func TridiagonalMatMul(scope *Scope, superdiag tf.Output, maindiag tf.Output, subdiag tf.Output, rhs tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TridiagonalMatMul", - Input: []tf.Input{ - superdiag, maindiag, subdiag, rhs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` -// -// if < 0, `scale * features` otherwise. -// -// To be used together with -// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`. -// For correct dropout, use `tf.contrib.nn.alpha_dropout`. -// -// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -func Selu(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Selu", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DenseBincountAttr is an optional argument to DenseBincount. -type DenseBincountAttr func(optionalAttr) - -// DenseBincountBinaryOutput sets the optional binary_output attribute to value. -// -// value: bool; Whether the kernel should count the appearance or number of occurrences. -// If not specified, defaults to false -func DenseBincountBinaryOutput(value bool) DenseBincountAttr { - return func(m optionalAttr) { - m["binary_output"] = value - } -} - -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. -// -// Values in `arr` outside of the range [0, size) are ignored. -// -// Arguments: -// input: 1D or 2D int `Tensor`. -// size: non-negative int scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. -// -// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`]. -// The counts or summed weights for each value in the range [0, size). -func DenseBincount(scope *Scope, input tf.Output, size tf.Output, weights tf.Output, optional ...DenseBincountAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DenseBincount", - Input: []tf.Input{ - input, size, weights, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the complex conjugate of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// complex numbers that are the complex conjugate of each element in `input`. The -// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the -// real part and *b* is the imaginary part. -// -// The complex conjugate returned by this operation is of the form \\(a - bj\\). -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] -// ``` -func Conj(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Conj", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) - -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the imaginary part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] -// ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Imag", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RealAttr is an optional argument to Real. -type RealAttr func(optionalAttr) - -// RealTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func RealTout(value tf.DataType) RealAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the real part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the real part of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real -// part returned by this operation and *b* is the imaginary part. -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.real(input) ==> [-2.25, 3.25] -// ``` -func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Real", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DequantizeAttr is an optional argument to Dequantize. -type DequantizeAttr func(optionalAttr) - -// DequantizeMode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func DequantizeMode(value string) DequantizeAttr { - return func(m optionalAttr) { - m["mode"] = value - } -} - -// DequantizeNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func DequantizeNarrowRange(value bool) DequantizeAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// DequantizeAxis sets the optional axis attribute to value. -// If not specified, defaults to -1 -func DequantizeAxis(value int64) DequantizeAttr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// DequantizeDtype sets the optional dtype attribute to value. -// -// value: Type of the output tensor. Currently Dequantize supports float and bfloat16. -// If 'dtype' is 'bfloat16', it only supports 'MIN_COMBINED' mode. -// If not specified, defaults to DT_FLOAT -func DequantizeDtype(value tf.DataType) DequantizeAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Dequantize the 'input' tensor into a float or bfloat16 Tensor. -// -// [min_range, max_range] are scalar floats that specify the range for -// the output. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// if T == qint8: in[i] += (range(T) + 1)/ 2.0 -// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) -// ``` -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// If the input comes from a QuantizedRelu6, the output type is -// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is -// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. -// Dequantize on quint8 will take each value, cast to float, and multiply -// by 6 / 255. -// Note that if quantizedtype is qint8, the operation will additionally add -// each value by 128 prior to casting. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ```c++ -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = range / num_discrete_values -// const double offset_input = static_cast(input) - lowest_quantized; -// result = range_min + ((input - numeric_limits::min()) * range_scale) -// ``` -// -// If the mode is `SCALED`, dequantization is performed by multiplying each -// input value by a scaling_factor. (Thus an input of 0 always maps to 0.0). -// -// The scaling_factor is determined from `min_range`, `max_range`, and -// `narrow_range` in a way that is compatible with `QuantizeAndDequantize{V2|V3}` -// and `QuantizeV2`, using the following algorithm: -// -// ```c++ -// -// const int min_expected_T = std::numeric_limits::min() + -// (narrow_range ? 1 : 0); -// const int max_expected_T = std::numeric_limits::max(); -// const float max_expected_T = std::numeric_limits::max(); -// -// const float scale_factor = -// (std::numeric_limits::min() == 0) ? (max_range / max_expected_T) -// : std::max(min_range / min_expected_T, -// max_range / max_expected_T); -// ``` -// -// Arguments: -// -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Dequantize", - Input: []tf.Input{ - input, min_range, max_range, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) - -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Converts two real numbers to a complex number. -// -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. -// -// The input tensors `real` and `imag` must have the same shape. -// -// For example: -// -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Complex", - Input: []tf.Input{ - real, imag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNCanonicalToParamsV2Attr is an optional argument to CudnnRNNCanonicalToParamsV2. -type CudnnRNNCanonicalToParamsV2Attr func(optionalAttr) - -// CudnnRNNCanonicalToParamsV2RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNCanonicalToParamsV2RnnMode(value string) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNCanonicalToParamsV2InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNCanonicalToParamsV2InputMode(value string) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNCanonicalToParamsV2Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNCanonicalToParamsV2Direction(value string) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNCanonicalToParamsV2Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsV2Dropout(value float32) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNCanonicalToParamsV2Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsV2Seed(value int64) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNCanonicalToParamsV2Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsV2Seed2(value int64) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNCanonicalToParamsV2NumProj sets the optional num_proj attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsV2NumProj(value int64) CudnnRNNCanonicalToParamsV2Attr { - return func(m optionalAttr) { - m["num_proj"] = value - } -} - -// Converts CudnnRNN params from canonical form to usable form. It supports the projection in LSTM. -// -// Writes a set of weights into the opaque params buffer so they can be used in -// upcoming training or inferences. -// -// Note that the params buffer may not be compatible across different GPUs. So any -// save and restoration should be converted to and from the canonical weights and -// biases. -// -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// weights: the canonical form of weights that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// biases: the canonical form of biases that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// num_params_weights: number of weight parameter matrix for all layers. -// num_params_biases: number of bias parameter vector for all layers. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -// num_proj: The output dimensionality for the projection matrices. If None or 0, -// no projection is performed. -func CudnnRNNCanonicalToParamsV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsV2Attr) (params tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNNCanonicalToParamsV2", - Input: []tf.Input{ - num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a sequence of numbers. -// -// This operation creates a sequence of numbers that begins at `start` and -// extends by increments of `delta` up to but not including `limit`. -// -// For example: -// -// ``` -// # 'start' is 3 -// # 'limit' is 18 -// # 'delta' is 3 -// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] -// ``` -// -// Arguments: -// start: 0-D (scalar). First entry in the sequence. -// limit: 0-D (scalar). Upper limit of sequence, exclusive. -// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. -// -// Returns 1-D. -func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Range", - Input: []tf.Input{ - start, limit, delta, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AnyAttr is an optional argument to Any. -type AnyAttr func(optionalAttr) - -// AnyKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AnyKeepDims(value bool) AnyAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the "logical or" of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Any", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient of morphological 2-D dilation with respect to the input. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape `[batch, in_height, in_width, depth]`. -func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "Dilation2DBackpropInput", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AllAttr is an optional argument to All. -type AllAttr func(optionalAttr) - -// AllKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AllKeepDims(value bool) AllAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the "logical and" of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "All", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentSqrtN. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentSqrtN op. -// indices: indices passed to the corresponding SparseSegmentSqrtN op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op. -func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the mean along sparse segments of a tensor. -// -// See `tf.sparse.segment_sum` for usage examples. -// -// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMean", - Input: []tf.Input{ - data, indices, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along sparse segments of a tensor. -// -// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is -// missing, the `output` tensor at that position will be zeroed. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation) -// for an explanation of segments. -// -// For example: -// -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) -// -// tf.sparse_segment_sum_with_num_segments( -// c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3) -// # => [[0 0 0 0] -// # [0 0 0 0] -// # [0 0 0 0]] -// -// tf.sparse_segment_sum_with_num_segments(c, -// tf.constant([0, 1]), -// tf.constant([0, 2], -// num_segments=4)) -// # => [[ 1 2 3 4] -// # [ 0 0 0 0] -// # [-1 -2 -3 -4] -// # [ 0 0 0 0]] -// ``` -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. -func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSumWithNumSegments", - Input: []tf.Input{ - data, indices, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along sparse segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. -// -// For example: -// -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) -// -// # Select two rows, one segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0])) -// # => [[0 0 0 0]] -// -// # Select two rows, two segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1])) -// # => [[ 1 2 3 4] -// # [-1 -2 -3 -4]] -// -// # Select all rows, two segments. -// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1])) -// # => [[0 0 0 0] -// # [5 6 7 8]] -// -// # Which is equivalent to: -// tf.segment_sum(c, tf.constant([0, 0, 1])) -// ``` -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSum", - Input: []tf.Input{ - data, indices, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such -// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` -// need not be sorted and need not cover all values in the full -// range of valid values. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// If the given segment ID `i` is negative, the value is dropped and will not be -// added to the sum of the segment. -// -// `num_segments` should equal the number of distinct segment IDs. -// -//
-// -//
-// -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 5, 5, 5, 5], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentSum", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes arctangent of `y/x` element-wise, respecting signs of the arguments. -// -// This is the angle \( \theta \in [-\pi, \pi] \) such that -// \[ x = r \cos(\theta) \] -// and -// \[ y = r \sin(\theta) \] -// where \(r = \sqrt(x^2 + y^2) \). -func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atan2", - Input: []tf.Input{ - y, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the product along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \prod_j data_j\\) where the product is over `j` such -// that `segment_ids[j] == i`. -// -// If the product is empty for a given segment ID `i`, `output[i] = 1`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_prod(c, tf.constant([0, 0, 1])) -// # ==> [[4, 6, 6, 4], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentProd", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad. -type ResourceApplyAdamWithAmsgradAttr func(optionalAttr) - -// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the Adam algorithm. -// -// $$\text{lr}_t := \mathrm{learning_rate} * \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ -// $$m_t := \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ -// $$v_t := \beta_2 * v_{t-1} + (1 - \beta_2) * g * g$$ -// $$\hat{v}_t := max{\hat{v}_{t-1}, v_t}$$ -// $$\text{variable} := \text{variable} - \text{lr}_t * m_t / (\sqrt{\hat{v}_t} + \epsilon)$$ -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// vhat: Should be from a Variable(). -// beta1_power: Must be a scalar. -// beta2_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdamWithAmsgrad", - Input: []tf.Input{ - var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the sum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \sum_j data_j\\) where sum is over `j` such -// that `segment_ids[j] == i`. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_sum(c, tf.constant([0, 0, 1])) -// # ==> [[5, 5, 5, 5], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentSum", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ArgMinAttr is an optional argument to ArgMin. -type ArgMinAttr func(optionalAttr) - -// ArgMinOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMinOutputType(value tf.DataType) ArgMinAttr { - return func(m optionalAttr) { - m["output_type"] = value - } -} - -// Returns the index with the smallest value across dimensions of a tensor. -// -// Note that in case of ties the identity of the return value is not guaranteed. -// -// Usage: -// ```python -// import tensorflow as tf -// a = [1, 10, 26.9, 2.8, 166.32, 62.3] -// b = tf.math.argmin(input = a) -// c = tf.keras.backend.eval(b) -// # c = 0 -// # here a[0] = 1 which is the smallest element of a across axis 0 -// ``` -// -// Arguments: -// -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ArgMin", - Input: []tf.Input{ - input, dimension, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the reverse mode backpropagated gradient of the Cholesky algorithm. -// -// For an explanation see "Differentiation of the Cholesky algorithm" by -// Iain Murray http://arxiv.org/abs/1602.07527. -// -// Arguments: -// l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. -// grad: df/dl where f is some scalar function. Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. -// -// Returns Symmetrized version of df/dA . Shape is `[..., M, M]` -func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CholeskyGrad", - Input: []tf.Input{ - l, grad, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reshapes a tensor. -// -// Given `tensor`, this operation returns a tensor that has the same values -// as `tensor` with shape `shape`. -// -// If one component of 1-D tensor `shape` is the special value -1, the size of that -// dimension is computed so that the total size remains constant. In particular, a -// `shape` of `[-1]` flattens into 1-D. At most one component of `shape` may be -// unknown. -// -// The `shape` must be 1-D and the operation returns a tensor with shape -// `shape` filled with the values of `tensor`. In this case, the number of elements -// implied by `shape` must be the same as the number of elements in `tensor`. -// -// It is an error if `shape` is not 1-D. -// -// For example: -// -// ``` -// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9] -// # tensor 't' has shape [9] -// reshape(t, [3, 3]) ==> [[1, 2, 3], -// [4, 5, 6], -// [7, 8, 9]] -// -// # tensor 't' is [[[1, 1], [2, 2]], -// # [[3, 3], [4, 4]]] -// # tensor 't' has shape [2, 2, 2] -// reshape(t, [2, 4]) ==> [[1, 1, 2, 2], -// [3, 3, 4, 4]] -// -// # tensor 't' is [[[1, 1, 1], -// # [2, 2, 2]], -// # [[3, 3, 3], -// # [4, 4, 4]], -// # [[5, 5, 5], -// # [6, 6, 6]]] -// # tensor 't' has shape [3, 2, 3] -// # pass '[-1]' to flatten 't' -// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6] -// -// # -1 can also be used to infer the shape -// -// # -1 is inferred to be 9: -// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], -// [4, 4, 4, 5, 5, 5, 6, 6, 6]] -// # -1 is inferred to be 2: -// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3], -// [4, 4, 4, 5, 5, 5, 6, 6, 6]] -// # -1 is inferred to be 3: -// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1], -// [2, 2, 2], -// [3, 3, 3]], -// [[4, 4, 4], -// [5, 5, 5], -// [6, 6, 6]]] -// -// # tensor 't' is [7] -// # shape `[]` reshapes to a scalar -// reshape(t, []) ==> 7 -// ``` -// -// Arguments: -// -// shape: Defines the shape of the output tensor. -func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reshape", - Input: []tf.Input{ - tensor, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SnapshotDatasetAttr is an optional argument to SnapshotDataset. -type SnapshotDatasetAttr func(optionalAttr) - -// SnapshotDatasetCompression sets the optional compression attribute to value. -// If not specified, defaults to "" -func SnapshotDatasetCompression(value string) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["compression"] = value - } -} - -// SnapshotDatasetReaderPathPrefix sets the optional reader_path_prefix attribute to value. -// If not specified, defaults to "" -func SnapshotDatasetReaderPathPrefix(value string) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["reader_path_prefix"] = value - } -} - -// SnapshotDatasetWriterPathPrefix sets the optional writer_path_prefix attribute to value. -// If not specified, defaults to "" -func SnapshotDatasetWriterPathPrefix(value string) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["writer_path_prefix"] = value - } -} - -// SnapshotDatasetShardSizeBytes sets the optional shard_size_bytes attribute to value. -// If not specified, defaults to 10737418240 -func SnapshotDatasetShardSizeBytes(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["shard_size_bytes"] = value - } -} - -// SnapshotDatasetPendingSnapshotExpirySeconds sets the optional pending_snapshot_expiry_seconds attribute to value. -// If not specified, defaults to 86400 -func SnapshotDatasetPendingSnapshotExpirySeconds(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["pending_snapshot_expiry_seconds"] = value - } -} - -// SnapshotDatasetNumReaderThreads sets the optional num_reader_threads attribute to value. -// If not specified, defaults to 1 -func SnapshotDatasetNumReaderThreads(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["num_reader_threads"] = value - } -} - -// SnapshotDatasetReaderBufferSize sets the optional reader_buffer_size attribute to value. -// If not specified, defaults to 1 -func SnapshotDatasetReaderBufferSize(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["reader_buffer_size"] = value - } -} - -// SnapshotDatasetNumWriterThreads sets the optional num_writer_threads attribute to value. -// If not specified, defaults to 1 -func SnapshotDatasetNumWriterThreads(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["num_writer_threads"] = value - } -} - -// SnapshotDatasetWriterBufferSize sets the optional writer_buffer_size attribute to value. -// If not specified, defaults to 1 -func SnapshotDatasetWriterBufferSize(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["writer_buffer_size"] = value - } -} - -// SnapshotDatasetShuffleOnRead sets the optional shuffle_on_read attribute to value. -// If not specified, defaults to false -func SnapshotDatasetShuffleOnRead(value bool) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["shuffle_on_read"] = value - } -} - -// SnapshotDatasetSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func SnapshotDatasetSeed(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// SnapshotDatasetSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func SnapshotDatasetSeed2(value int64) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SnapshotDatasetMode sets the optional mode attribute to value. -// If not specified, defaults to "auto" -func SnapshotDatasetMode(value string) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["mode"] = value - } -} - -// SnapshotDatasetSnapshotName sets the optional snapshot_name attribute to value. -// If not specified, defaults to "" -func SnapshotDatasetSnapshotName(value string) SnapshotDatasetAttr { - return func(m optionalAttr) { - m["snapshot_name"] = value - } -} - -// Creates a dataset that will write to / read from a snapshot. -// -// This dataset attempts to determine whether a valid snapshot exists at the -// `snapshot_path`, and reads from the snapshot in lieu of using `input_dataset`. -// If not, it will run the preprocessing pipeline as usual, and write out a -// snapshot of the data processed for future use. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// path: The path we should write snapshots to / read snapshots from. -// -// -func SnapshotDataset(scope *Scope, input_dataset tf.Output, path tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...SnapshotDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SnapshotDataset", - Input: []tf.Input{ - input_dataset, path, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ArgMaxAttr is an optional argument to ArgMax. -type ArgMaxAttr func(optionalAttr) - -// ArgMaxOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMaxOutputType(value tf.DataType) ArgMaxAttr { - return func(m optionalAttr) { - m["output_type"] = value - } -} - -// Returns the index with the largest value across dimensions of a tensor. -// -// Note that in case of ties the identity of the return value is not guaranteed. -// -// Usage: -// ```python -// import tensorflow as tf -// a = [1, 10, 26.9, 2.8, 166.32, 62.3] -// b = tf.math.argmax(input = a) -// c = tf.keras.backend.eval(b) -// # c = 4 -// # here a[4] = 166.32 which is the largest element of a across axis 0 -// ``` -// -// Arguments: -// -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ArgMax", - Input: []tf.Input{ - input, dimension, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. -type ResizeBilinearGradAttr func(optionalAttr) - -// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBilinearGradHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBilinearGradHalfPixelCenters(value bool) ResizeBilinearGradAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Computes the gradient of bilinear interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBilinearGrad", - Input: []tf.Input{ - grads, original_image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxAttr is an optional argument to Max. -type MaxAttr func(optionalAttr) - -// MaxKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MaxKeepDims(value bool) MaxAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the maximum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Max", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Get the value of the tensor specified by its handle. -// -// Arguments: -// handle: The handle for a tensor stored in the session state. -// dtype: The type of the output value. -// -// Returns The tensor for the given handle. -func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "GetSessionTensor", - Input: []tf.Input{ - handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a tensor containing the reduction across all input tensors. -// -// Outputs a tensor containing the reduction across all input tensors passed to ops -// within the same `shared_name. -// -// The graph should be constructed so if one op runs with shared_name value `c`, -// then `num_devices` ops will run with shared_name value `c`. Failure to do so -// will cause the graph execution to fail to complete. -// -// input: the input to the reduction -// data: the value of the reduction across all `num_devices` devices. -// reduction: the reduction operation to perform. -// num_devices: The number of devices participating in this reduction. -// shared_name: Identifier that shared between ops of the same reduction. -func NcclAllReduce(scope *Scope, input tf.Output, reduction string, num_devices int64, shared_name string) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"reduction": reduction, "num_devices": num_devices, "shared_name": shared_name} - opspec := tf.OpSpec{ - Type: "NcclAllReduce", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MinAttr is an optional argument to Min. -type MinAttr func(optionalAttr) - -// MinKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MinKeepDims(value bool) MinAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the minimum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Min", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2. -type SampleDistortedBoundingBoxV2Attr func(optionalAttr) - -// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value. -// -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["aspect_ratio_range"] = value - } -} - -// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. -// -// value: The cropped area of the image must contain a fraction of the -// supplied image within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["area_range"] = value - } -} - -// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value. -// -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["max_attempts"] = value - } -} - -// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. -// -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value - } -} - -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) -// -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` -// -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. -// -// Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. -// min_object_covered: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. -// -// Returns: -// begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`. -// size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`. -// bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBoxV2", - Input: []tf.Input{ - image_size, bounding_boxes, min_object_covered, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// EigAttr is an optional argument to Eig. -type EigAttr func(optionalAttr) - -// EigComputeV sets the optional compute_v attribute to value. -// -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func EigComputeV(value bool) EigAttr { - return func(m optionalAttr) { - m["compute_v"] = value - } -} - -// Computes the eigen decomposition of one or more square matrices. -// -// Computes the eigenvalues and (optionally) right eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues -// are sorted in non-decreasing order. -// -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = eig(a) -// e = eig(a, compute_v=False) -// ``` -// -// Arguments: -// input: `Tensor` input of shape `[N, N]`. -// -// -// Returns: -// e: Eigenvalues. Shape is `[N]`. -// v: Eigenvectors. Shape is `[N, N]`. -func Eig(scope *Scope, input tf.Output, Tout tf.DataType, optional ...EigAttr) (e tf.Output, v tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"Tout": Tout} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Eig", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ProdAttr is an optional argument to Prod. -type ProdAttr func(optionalAttr) - -// ProdKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func ProdKeepDims(value bool) ProdAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the product of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Prod", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SumAttr is an optional argument to Sum. -type SumAttr func(optionalAttr) - -// SumKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SumKeepDims(value bool) SumAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the sum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Sum", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BoostedTreesQuantileStreamResourceFlushAttr is an optional argument to BoostedTreesQuantileStreamResourceFlush. -type BoostedTreesQuantileStreamResourceFlushAttr func(optionalAttr) - -// BoostedTreesQuantileStreamResourceFlushGenerateQuantiles sets the optional generate_quantiles attribute to value. -// -// value: bool; If True, the output will be the num_quantiles for each stream where the ith -// entry is the ith quantile of the input with an approximation error of epsilon. -// Duplicate values may be present. -// If False, the output will be the points in the histogram that we got which roughly -// translates to 1/epsilon boundaries and without any duplicates. -// Default to False. -// If not specified, defaults to false -func BoostedTreesQuantileStreamResourceFlushGenerateQuantiles(value bool) BoostedTreesQuantileStreamResourceFlushAttr { - return func(m optionalAttr) { - m["generate_quantiles"] = value - } -} - -// Flush the summaries for a quantile stream resource. -// -// An op that flushes the summaries for a quantile stream resource. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// num_buckets: int; approximate number of buckets unless using generate_quantiles. -// -// Returns the created operation. -func BoostedTreesQuantileStreamResourceFlush(scope *Scope, quantile_stream_resource_handle tf.Output, num_buckets tf.Output, optional ...BoostedTreesQuantileStreamResourceFlushAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceFlush", - Input: []tf.Input{ - quantile_stream_resource_handle, num_buckets, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2. -type WholeFileReaderV2Attr func(optionalAttr) - -// WholeFileReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// WholeFileReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A Reader that outputs the entire contents of a file as a value. -// -// To use, enqueue filenames in a Queue. The output of ReaderRead will -// be a filename (key) and the contents of that file (value). -// -// Returns The handle to reference the Reader. -func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "WholeFileReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ShapeNAttr is an optional argument to ShapeN. -type ShapeNAttr func(optionalAttr) - -// ShapeNOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func ShapeNOutType(value tf.DataType) ShapeNAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Returns shape of tensors. -// -// This operation returns N 1-D integer tensors representing shape of `input[i]s`. -func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ShapeN", - Input: []tf.Input{ - tf.OutputList(input), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("ShapeN", err) - return - } - return output -} - -// ImageSummaryAttr is an optional argument to ImageSummary. -type ImageSummaryAttr func(optionalAttr) - -// ImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func ImageSummaryMaxImages(value int64) ImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} - -// ImageSummaryBadColor sets the optional bad_color attribute to value. -// -// value: Color to use for pixels with non-finite values. -// If not specified, defaults to > int_val:255 int_val:0 int_val:0 int_val:255 > -func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { - return func(m optionalAttr) { - m["bad_color"] = value - } -} - -// Outputs a `Summary` protocol buffer with images. -// -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: -// -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. -// -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: -// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. -// -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. -// -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ImageSummary", - Input: []tf.Input{ - tag, tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CollectiveBcastSendAttr is an optional argument to CollectiveBcastSend. -type CollectiveBcastSendAttr func(optionalAttr) - -// CollectiveBcastSendCommunicationHint sets the optional communication_hint attribute to value. -// If not specified, defaults to "auto" -func CollectiveBcastSendCommunicationHint(value string) CollectiveBcastSendAttr { - return func(m optionalAttr) { - m["communication_hint"] = value - } -} - -// CollectiveBcastSendTimeoutSeconds sets the optional timeout_seconds attribute to value. -// If not specified, defaults to 0 -func CollectiveBcastSendTimeoutSeconds(value float32) CollectiveBcastSendAttr { - return func(m optionalAttr) { - m["timeout_seconds"] = value - } -} - -// Broadcasts a tensor value to one or more other devices. -func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveBcastSendAttr) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CollectiveBcastSend", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CombinedNonMaxSuppressionAttr is an optional argument to CombinedNonMaxSuppression. -type CombinedNonMaxSuppressionAttr func(optionalAttr) - -// CombinedNonMaxSuppressionPadPerClass sets the optional pad_per_class attribute to value. -// -// value: If false, the output nmsed boxes, scores and classes -// are padded/clipped to `max_total_size`. If true, the -// output nmsed boxes, scores and classes are padded to be of length -// `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in -// which case it is clipped to `max_total_size`. Defaults to false. -// If not specified, defaults to false -func CombinedNonMaxSuppressionPadPerClass(value bool) CombinedNonMaxSuppressionAttr { - return func(m optionalAttr) { - m["pad_per_class"] = value - } -} - -// CombinedNonMaxSuppressionClipBoxes sets the optional clip_boxes attribute to value. -// -// value: If true, assume the box coordinates are between [0, 1] and clip the output boxes -// if they fall beyond [0, 1]. If false, do not do clipping and output the box -// coordinates as it is. -// If not specified, defaults to true -func CombinedNonMaxSuppressionClipBoxes(value bool) CombinedNonMaxSuppressionAttr { - return func(m optionalAttr) { - m["clip_boxes"] = value - } -} - -// Greedily selects a subset of bounding boxes in descending order of score, -// -// This operation performs non_max_suppression on the inputs per batch, across -// all classes. -// Prunes away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Also note that -// this algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is the final boxes, scores and classes tensor -// returned after performing non_max_suppression. -// -// Arguments: -// boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then -// same boxes are used for all classes otherwise, if `q` is equal to number of -// classes, class-specific boxes are used. -// scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]` -// representing a single score corresponding to each box (each row of boxes). -// max_output_size_per_class: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression per class -// max_total_size: A scalar representing maximum number of boxes retained over all classes. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. -// -// Returns: -// nmsed_boxes: A [batch_size, max_detections, 4] float32 tensor -// containing the non-max suppressed boxes. -// nmsed_scores: A [batch_size, max_detections] float32 tensor -// containing the scores for the boxes. -// nmsed_classes: A [batch_size, max_detections] float32 tensor -// containing the classes for the boxes. -// valid_detections: A [batch_size] int32 tensor indicating the number of -// valid detections per batch item. Only the top num_detections[i] entries in -// nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the -// entries are zero paddings. -func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size_per_class tf.Output, max_total_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...CombinedNonMaxSuppressionAttr) (nmsed_boxes tf.Output, nmsed_scores tf.Output, nmsed_classes tf.Output, valid_detections tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CombinedNonMaxSuppression", - Input: []tf.Input{ - boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Returns the truth value of x AND y element-wise. -// -// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalAnd", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ApproximateEqualAttr is an optional argument to ApproximateEqual. -type ApproximateEqualAttr func(optionalAttr) - -// ApproximateEqualTolerance sets the optional tolerance attribute to value. -// If not specified, defaults to 1e-05 -func ApproximateEqualTolerance(value float32) ApproximateEqualAttr { - return func(m optionalAttr) { - m["tolerance"] = value - } -} - -// Returns the truth value of abs(x-y) < tolerance element-wise. -func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ApproximateEqual", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LowerBoundAttr is an optional argument to LowerBound. -type LowerBoundAttr func(optionalAttr) - -// LowerBoundOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func LowerBoundOutType(value tf.DataType) LowerBoundAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Applies lower_bound(sorted_search_values, values) along each row. -// -// Each set of rows with the same index in (sorted_inputs, values) is treated -// independently. The resulting row is the equivalent of calling -// `np.searchsorted(sorted_inputs, values, side='left')`. -// -// The result is not a global index to the entire -// `Tensor`, but rather just the index in the last dimension. -// -// A 2-D example: -// sorted_sequence = [[0, 3, 9, 9, 10], -// [1, 2, 3, 4, 5]] -// values = [[2, 4, 9], -// [0, 2, 6]] -// -// result = LowerBound(sorted_sequence, values) -// -// result == [[1, 2, 2], -// [0, 1, 5]] -// -// Arguments: -// sorted_inputs: 2-D Tensor where each row is ordered. -// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains -// the values that will be searched for in `sorted_search_values`. -// -// Returns A `Tensor` with the same shape as `values`. It contains the first scalar index -// into the last dimension where values can be inserted without changing the -// ordered property. -func LowerBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...LowerBoundAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LowerBound", - Input: []tf.Input{ - sorted_inputs, values, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of (x > y) element-wise. -// -// *NOTE*: `Greater` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// Example: -// -// ```python -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5, 2, 5]) -// tf.math.greater(x, y) ==> [False, True, True] -// -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5]) -// tf.math.greater(x, y) ==> [False, False, True] -// ``` -func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Greater", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Compute the polygamma function \\(\psi^{(n)}(x)\\). -// -// The polygamma function is defined as: -// -// -// \\(\psi^{(a)}(x) = \frac{d^a}{dx^a} \psi(x)\\) -// -// where \\(\psi(x)\\) is the digamma function. -// The polygamma function is defined only for non-negative integer orders \\a\\. -func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Polygamma", - Input: []tf.Input{ - a, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Shuffle dimensions of x according to a permutation. -// -// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: -// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` -func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Transpose", - Input: []tf.Input{ - x, perm, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) - -// AssertSummarize sets the optional summarize attribute to value. -// -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Asserts that the given condition is true. -// -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. -// -// Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. -// -// Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Assert", - Input: []tf.Input{ - condition, tf.OutputList(data), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the gradient of `igamma(a, x)` wrt `a`. -func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IgammaGradA", - Input: []tf.Input{ - a, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Compute the upper regularized incomplete Gamma function `Q(a, x)`. -// -// The upper regularized incomplete Gamma function is defined as: -// -// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\) -// -// where -// -// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\) -// -// is the upper incomplete Gama function. -// -// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete -// Gamma function. -func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Igammac", - Input: []tf.Input{ - a, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise remainder of division. This emulates C semantics in that -// -// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * -// y + truncate_mod(x, y) = x`. -// -// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TruncateMod", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise remainder of division. This emulates C semantics in that -// -// the result here is consistent with a truncating divide. E.g. -// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. -// -// *NOTE*: `Mod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Mod", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A substitute for `InterleaveDataset` on a fixed list of `N` datasets. -// -// Arguments: -// selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the -// `N` data inputs should produce the next output element. -// data_input_datasets: `N` datasets with the same type that will be interleaved according to -// the values of `selector_input_dataset`. -// -// -func ExperimentalDirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalDirectedInterleaveDataset", - Input: []tf.Input{ - selector_input_dataset, tf.OutputList(data_input_datasets), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. -// -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Minimum", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the max of x and y (i.e. x > y ? x : y) element-wise. -// -// *NOTE*: `Maximum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Maximum", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns 0 if x == 0, and x * log(y) otherwise, elementwise. -func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Xlogy", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Increments variable pointed to by 'resource' until it reaches 'limit'. -// -// Arguments: -// resource: Should be from a scalar `Variable` node. -// limit: If incrementing ref would bring it above limit, instead generates an -// 'OutOfRange' error. -// -// -// Returns A copy of the input before increment. If nothing else modifies the -// input, the values produced will all be distinct. -func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"limit": limit, "T": T} - opspec := tf.OpSpec{ - Type: "ResourceCountUpTo", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal. -type StatefulStandardNormalAttr func(optionalAttr) - -// StatefulStandardNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from a normal distribution. This op is deprecated in favor of op 'StatefulStandardNormalV2' -// -// DEPRECATED at GraphDef version 29: Use StatefulStandardNormalV2 instead -// -// The generated values will have mean 0 and standard deviation 1. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// shape: The shape of the output tensor. -// -// Returns A tensor of the specified shape filled with random normal values. -func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatefulStandardNormal", - Input: []tf.Input{ - resource, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x / y element-wise for real types. -// -// If `x` and `y` are reals, this will return the floating-point division. -// -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RealDiv", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x / y element-wise for integer types. -// -// Truncation designates that negative numbers will round fractional quantities -// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different -// than Python semantics. See `FloorDiv` for a division function that matches -// Python Semantics. -// -// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TruncateDiv", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns 0 if the denominator is zero. -// -// -// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DivNoNan", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Scatter `updates` into an existing tensor according to `indices`. -// -// This operation creates a new tensor by applying sparse `updates` to the passed -// in `tensor`. -// This operation is very similar to `tf.scatter_nd`, except that the updates are -// scattered onto an existing tensor (as opposed to a zero-tensor). If the memory -// for the existing tensor cannot be re-used, a copy is made and updated. -// -// If `indices` contains duplicates, then their updates are accumulated (summed). -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates -- because -// of some numerical approximation issues, numbers summed in different order -// may yield different results. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// >>> indices = tf.constant([[4], [3], [1], [7]]) -// >>> updates = tf.constant([9, 10, 11, 12]) -// >>> tensor = tf.ones([8], dtype=tf.int32) -// >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates)) -// tf.Tensor([ 1 11 1 10 9 1 1 12], shape=(8,), dtype=int32) -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -// In Python, this scatter operation would look like this: -// -// >>> indices = tf.constant([[0], [2]]) -// >>> updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// ... [7, 7, 7, 7], [8, 8, 8, 8]], -// ... [[5, 5, 5, 5], [6, 6, 6, 6], -// ... [7, 7, 7, 7], [8, 8, 8, 8]]]) -// >>> tensor = tf.ones([4, 4, 4], dtype=tf.int32) -// >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates).numpy()) -// [[[5 5 5 5] -// [6 6 6 6] -// [7 7 7 7] -// [8 8 8 8]] -// [[1 1 1 1] -// [1 1 1 1] -// [1 1 1 1] -// [1 1 1 1]] -// [[5 5 5 5] -// [6 6 6 6] -// [7 7 7 7] -// [8 8 8 8]] -// [[1 1 1 1] -// [1 1 1 1] -// [1 1 1 1] -// [1 1 1 1]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// tensor: Tensor to copy/update. -// indices: Index tensor. -// updates: Updates to scatter into output. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func TensorScatterUpdate(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorScatterUpdate", - Input: []tf.Input{ - tensor, indices, updates, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains `count` elements from the `input_dataset`. -// -// Arguments: -// -// count: A scalar representing the number of elements from the `input_dataset` -// that should be taken. A value of `-1` indicates that all of `input_dataset` -// is taken. -// -// -func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "TakeDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the last element of the input list as well as a list with all but that element. -// -// Fails if the list is empty. -// -// input_handle: the input list -// tensor: the withdrawn last element of the list -// element_dtype: the type of elements in the list -// element_shape: the shape of the output tensor -func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListPopBack", - Input: []tf.Input{ - input_handle, element_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2. -type QueueDequeueManyV2Attr func(optionalAttr) - -// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue has fewer than n elements, this operation -// will block for up to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Dequeues `n` tuples of one or more tensors from the given queue. -// -// If the queue is closed and there are fewer than `n` elements, then an -// OutOfRange error is returned. -// -// This operation concatenates queue-element component tensors along the -// 0th dimension to make a single component tensor. All of the components -// in the dequeued tuple will have size `n` in the 0th dimension. -// -// This operation has `k` outputs, where `k` is the number of components in -// the tuples stored in the given queue, and output `i` is the ith -// component of the dequeued tuple. -// -// N.B. If the queue is empty, this operation will block until `n` elements -// have been dequeued (or 'timeout_ms' elapses, if specified). -// -// Arguments: -// handle: The handle to a queue. -// n: The number of tuples to dequeue. -// component_types: The type of each component in a tuple. -// -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueDequeueManyV2", - Input: []tf.Input{ - handle, n, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueManyV2", err) - return - } - return components -} - -// Returns x * y element-wise. Returns zero if y is zero, even if x if infinite or NaN. -// -// *NOTE*: `MulNoNan` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func MulNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MulNoNan", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AsStringAttr is an optional argument to AsString. -type AsStringAttr func(optionalAttr) - -// AsStringPrecision sets the optional precision attribute to value. -// -// value: The post-decimal precision to use for floating point numbers. -// Only used if precision > -1. -// If not specified, defaults to -1 -func AsStringPrecision(value int64) AsStringAttr { - return func(m optionalAttr) { - m["precision"] = value - } -} - -// AsStringScientific sets the optional scientific attribute to value. -// -// value: Use scientific notation for floating point numbers. -// If not specified, defaults to false -func AsStringScientific(value bool) AsStringAttr { - return func(m optionalAttr) { - m["scientific"] = value - } -} - -// AsStringShortest sets the optional shortest attribute to value. -// -// value: Use shortest representation (either scientific or standard) for -// floating point numbers. -// If not specified, defaults to false -func AsStringShortest(value bool) AsStringAttr { - return func(m optionalAttr) { - m["shortest"] = value - } -} - -// AsStringWidth sets the optional width attribute to value. -// -// value: Pad pre-decimal numbers to this width. -// Applies to both floating point and integer numbers. -// Only used if width > -1. -// If not specified, defaults to -1 -func AsStringWidth(value int64) AsStringAttr { - return func(m optionalAttr) { - m["width"] = value - } -} - -// AsStringFill sets the optional fill attribute to value. -// -// value: The value to pad if width > -1. If empty, pads with spaces. -// Another typical value is '0'. String cannot be longer than 1 character. -// If not specified, defaults to "" -func AsStringFill(value string) AsStringAttr { - return func(m optionalAttr) { - m["fill"] = value - } -} - -// Converts each entry in the given tensor to strings. -// -// Supports many numeric types and boolean. -// -// For Unicode, see the -// [https://www.tensorflow.org/tutorials/representation/unicode](Working with Unicode text) -// tutorial. -// -// Examples: -// -// >>> tf.strings.as_string([3, 2]) -// -// >>> tf.strings.as_string([3.1415926, 2.71828], precision=2).numpy() -// array([b'3.14', b'2.72'], dtype=object) -func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AsString", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2. -type Conv3DBackpropFilterV2Attr func(optionalAttr) - -// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the filter. -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 5-D -// `[filter_depth, filter_height, filter_width, in_channels, out_channels]` -// tensor. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilterV2", - Input: []tf.Input{ - input, filter_sizes, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x + y element-wise. -// -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AddV2", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler. -type UniformCandidateSamplerAttr func(optionalAttr) - -// UniformCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a uniform distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns: -// sampled_candidates: A vector of length num_sampled, in which each element is -// the ID of a sampled candidate. -// true_expected_count: A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability. -// sampled_expected_count: A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniformCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// TryRpcAttr is an optional argument to TryRpc. -type TryRpcAttr func(optionalAttr) - -// TryRpcProtocol sets the optional protocol attribute to value. -// -// value: RPC protocol to use. Empty string means use the default protocol. -// Options include 'grpc'. -// If not specified, defaults to "" -func TryRpcProtocol(value string) TryRpcAttr { - return func(m optionalAttr) { - m["protocol"] = value - } -} - -// TryRpcFailFast sets the optional fail_fast attribute to value. -// -// value: `boolean`. If `true` (default), then failures to connect -// (i.e., the server does not immediately respond) cause an RPC failure. -// If not specified, defaults to true -func TryRpcFailFast(value bool) TryRpcAttr { - return func(m optionalAttr) { - m["fail_fast"] = value - } -} - -// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value. -// -// value: `int`. If `0` (default), then the kernel will run the RPC -// request and only time out if the RPC deadline passes or the session times out. -// If this value is greater than `0`, then the op will raise an exception if -// the RPC takes longer than `timeout_in_ms`. -// If not specified, defaults to 0 -func TryRpcTimeoutInMs(value int64) TryRpcAttr { - return func(m optionalAttr) { - m["timeout_in_ms"] = value - } -} - -// Perform batches of RPC requests. -// -// This op asynchronously performs either a single RPC request, or a batch -// of requests. RPC requests are defined by three main parameters: -// -// - `address` (the host+port or BNS address of the request) -// - `method` (the method name for the request) -// - `request` (the serialized proto string, or vector of strings, -// of the RPC request argument). -// -// For example, if you have an RPC service running on port localhost:2345, -// and its interface is configured with the following proto declaration: -// -// ``` -// service MyService { -// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { -// } -// }; -// ``` -// -// then call this op with arguments: -// -// ``` -// address = "localhost:2345" -// method = "MyService/MyMethod" -// ``` -// -// The `request` tensor is a string tensor representing serialized `MyRequestProto` -// strings; and the output string tensor `response` will have the same shape -// and contain (upon successful completion) corresponding serialized -// `MyResponseProto` strings. -// -// For example, to send a single, empty, `MyRequestProto`, call -// this op with `request = ""`. To send 5 **parallel** empty requests, -// call this op with `request = ["", "", "", "", ""]`. -// -// More generally, one can create a batch of `MyRequestProto` serialized protos -// from regular batched tensors using the `encode_proto` op, and convert -// the response `MyResponseProto` serialized protos to batched tensors -// using the `decode_proto` op. -// -// **NOTE** Working with serialized proto strings is faster than instantiating -// actual proto objects in memory, so no performance degradation is expected -// compared to writing custom kernels for this workflow. -// -// Unlike the standard `Rpc` op, if the connection fails or the remote worker -// returns an error status, this op does **not** reraise the exception. -// Instead, the `status_code` and `status_message` entry for the corresponding RPC -// call is set with the error returned from the RPC call. The `response` tensor -// will contain valid response values for those minibatch entries whose RPCs did -// not fail; the rest of the entries will have empty strings. -// -// Arguments: -// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `method` and `request`. -// method: `0-D` or `1-D`. The method address on the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `request`. -// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `method`. -// -// Returns: -// response: Same shape as `request`. Serialized proto strings: the rpc responses. -// status_code: Same shape as `request`. Values correspond to tensorflow Status enum codes. -// status_message: Same shape as `request`. Values correspond to Status messages -// returned from the RPC calls. -func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TryRpc", - Input: []tf.Input{ - address, method, request, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) - -// ResourceGatherBatchDims sets the optional batch_dims attribute to value. -// If not specified, defaults to 0 -func ResourceGatherBatchDims(value int64) ResourceGatherAttr { - return func(m optionalAttr) { - m["batch_dims"] = value - } -} - -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Gather slices from the variable pointed to by `resource` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] -// -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] -// -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceGather", - Input: []tf.Input{ - resource, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x + y element-wise. -// -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Add", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise smallest integer not less than x. -func Ceil(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Ceil", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise largest integer not greater than x. -func Floor(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Floor", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the Bessel i0e function of `x` element-wise. -// -// Exponentially scaled modified Bessel function of order 0 defined as -// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`. -// -// This function is faster and numerically stabler than `bessel_i0(x)`. -func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BesselI0e", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the trignometric inverse tangent of x element-wise. -// -// The `tf.math.atan` operation returns the inverse of `tf.math.tan`, such that -// if `y = tf.math.tan(x)` then, `x = tf.math.atan(y)`. -// -// **Note**: The output of `tf.math.atan` will lie within the invertible range -// of tan, i.e (-pi/2, pi/2). -// -// For example: -// -// ```python -// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)] -// x = tf.constant([1.047, 0.785]) -// y = tf.math.tan(x) # [1.731261, 0.99920404] -// -// tf.math.atan(y) # [1.047, 0.785] = x -// ``` -// -func Atan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atan", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes acos of x element-wise. -func Acos(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acos", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2. -type FusedBatchNormV2Attr func(optionalAttr) - -// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormV2ExponentialAvgFactor sets the optional exponential_avg_factor attribute to value. -// If not specified, defaults to 1 -func FusedBatchNormV2ExponentialAvgFactor(value float32) FusedBatchNormV2Attr { - return func(m optionalAttr) { - m["exponential_avg_factor"] = value - } -} - -// FusedBatchNormV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormV2IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. -// -// Returns: -// y: A 4D Tensor for output data. -// batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean. -// batch_variance: A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance. -// reserve_space_1: A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation. -// reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormV2", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Computes sine of x element-wise. -// -// Given an input tensor, this function computes sine of every -// element in the tensor. Input range is `(-inf, inf)` and -// output range is `[-1,1]`. -// -// ```python -// x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10, float("inf")]) -// tf.math.sin(x) ==> [nan -0.4121185 -0.47942555 0.84147096 0.9320391 -0.87329733 -0.54402107 nan] -// ``` -func Sin(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sin", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PrintAttr is an optional argument to Print. -type PrintAttr func(optionalAttr) - -// PrintMessage sets the optional message attribute to value. -// -// value: A string, prefix of the error message. -// If not specified, defaults to "" -func PrintMessage(value string) PrintAttr { - return func(m optionalAttr) { - m["message"] = value - } -} - -// PrintFirstN sets the optional first_n attribute to value. -// -// value: Only log `first_n` number of times. -1 disables logging. -// If not specified, defaults to -1 -func PrintFirstN(value int64) PrintAttr { - return func(m optionalAttr) { - m["first_n"] = value - } -} - -// PrintSummarize sets the optional summarize attribute to value. -// -// value: Only print this many entries of each tensor. -// If not specified, defaults to 3 -func PrintSummarize(value int64) PrintAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Prints a list of tensors. -// -// Passes `input` through to `output` and prints `data` when evaluating. -// -// Arguments: -// input: The tensor passed to `output` -// data: A list of tensors to print out when op is evaluated. -// -// Returns = The unmodified `input` tensor -func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Print", - Input: []tf.Input{ - input, tf.OutputList(data), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the Approximate Minimum Degree (AMD) ordering of `input`. -// -// Computes the Approximate Minimum Degree (AMD) ordering for a sparse matrix. -// -// The returned permutation may be used to permute the rows and columns of the -// given sparse matrix. This typically results in permuted sparse matrix's sparse -// Cholesky (or other decompositions) in having fewer zero fill-in compared to -// decomposition of the original matrix. -// -// The input sparse matrix may have rank 2 or rank 3. The output Tensor, -// representing would then have rank 1 or 2 respectively, with the same batch -// shape as the input. -// -// Each component of the input sparse matrix must represent a square symmetric -// matrix; only the lower triangular part of the matrix is read. The values of the -// sparse matrix does not affect the returned permutation, only the sparsity -// pattern of the sparse matrix is used. Hence, a single AMD ordering may be -// reused for the Cholesky decompositions of sparse matrices with the same sparsity -// pattern but with possibly different values. -// -// Each batch component of the output permutation represents a permutation of `N` -// elements, where the input sparse matrix components each have `N` rows. That is, -// the component contains each of the integers `{0, .. N-1}` exactly once. The -// `i`th element represents the row index that the `i`th row maps to. -// -// Usage example: -// -// ```python -// from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops -// -// a_indices = np.array([[0, 0], [1, 1], [2, 1], [2, 2], [3, 3]]) -// a_values = np.array([1.0, 2.0, 1.0, 3.0, 4.0], np.float32) -// a_dense_shape = [4, 4] -// -// with tf.Session() as sess: -// # Define (COO format) SparseTensor over Numpy array. -// a_st = tf.sparse.SparseTensor(a_indices, a_values, a_dense_shape) -// -// # Convert SparseTensors to CSR SparseMatrix. -// a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix( -// a_st.indices, a_st.values, a_st.dense_shape) -// -// # Obtain the AMD Ordering for the CSR SparseMatrix. -// ordering_amd = sparse_csr_matrix_ops.sparse_matrix_ordering_amd(sparse_matrix) -// -// ordering_amd_value = sess.run(ordering_amd) -// ``` -// -// `ordering_amd_value` stores the AMD ordering: `[1 2 3 0]`. -// -// input: A `CSRSparseMatrix`. -// -// Arguments: -// input: A `CSRSparseMatrix`. -// -// Returns The Approximate Minimum Degree (AMD) ordering of `input`. -func SparseMatrixOrderingAMD(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseMatrixOrderingAMD", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes Psi, the derivative of Lgamma (the log of the absolute value of -// -// `Gamma(x)`), element-wise. -func Digamma(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Digamma", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the tanh of `x` wrt its input. -// -// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` -// is the corresponding input gradient. -func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TanhGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FusedBatchNormAttr is an optional argument to FusedBatchNorm. -type FusedBatchNormAttr func(optionalAttr) - -// FusedBatchNormEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormExponentialAvgFactor sets the optional exponential_avg_factor attribute to value. -// If not specified, defaults to 1 -func FusedBatchNormExponentialAvgFactor(value float32) FusedBatchNormAttr { - return func(m optionalAttr) { - m["exponential_avg_factor"] = value - } -} - -// FusedBatchNormDataFormat sets the optional data_format attribute to value. -// -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormIsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. -// -// Returns: -// y: A 4D Tensor for output data. -// batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean. -// batch_variance: A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance. -// reserve_space_1: A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation. -// reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNorm", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// SparseMatMulAttr is an optional argument to SparseMatMul. -type SparseMatMulAttr func(optionalAttr) - -// SparseMatMulTransposeA sets the optional transpose_a attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeA(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// SparseMatMulTransposeB sets the optional transpose_b attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeB(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["a_is_sparse"] = value - } -} - -// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["b_is_sparse"] = value - } -} - -// Multiply matrix "a" by matrix "b". -// -// The inputs must be two-dimensional matrices and the inner dimension of "a" must -// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not -// `SparseTensor`s. This op is optimized for the case where at least one of "a" or -// "b" is sparse, in the sense that they have a large proportion of zero values. -// The breakeven for using this versus a dense matrix multiply on one platform was -// 30% zero values in the sparse matrix. -// -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Resizes the list. -// -// -// input_handle: the input list -// size: size of the output list -// -func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListResize", - Input: []tf.Input{ - input_handle, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes inverse hyperbolic tangent of x element-wise. -// -// Given an input tensor, this function computes inverse hyperbolic tangent -// for every element in the tensor. Input range is `[-1,1]` and output range is -// `[-inf, inf]`. If input is `-1`, output will be `-inf` and if the -// input is `1`, output will be `inf`. Values outside the range will have -// `nan` as output. -// -// ```python -// x = tf.constant([-float("inf"), -1, -0.5, 1, 0, 0.5, 10, float("inf")]) -// tf.math.atanh(x) ==> [nan -inf -0.54930615 inf 0. 0.54930615 nan nan] -// ``` -func Atanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Atanh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes hyperbolic tangent of `x` element-wise. -// -// Given an input tensor, this function computes hyperbolic tangent of every -// element in the tensor. Input range is `[-inf, inf]` and -// output range is `[-1,1]`. -// -// ```python -// x = tf.constant([-float("inf"), -5, -0.5, 1, 1.2, 2, 3, float("inf")]) -// tf.math.tanh(x) ==> [-1. -0.99990916 -0.46211717 0.7615942 0.8336547 0.9640276 0.9950547 1.] -// ``` -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tanh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes hyperbolic sine of x element-wise. -// -// Given an input tensor, this function computes hyperbolic sine of every -// element in the tensor. Input range is `[-inf,inf]` and output range -// is `[-inf,inf]`. -// -// ```python -// x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")]) -// tf.math.sinh(x) ==> [-inf -4.0515420e+03 -5.2109528e-01 1.1752012e+00 1.5094614e+00 3.6268604e+00 1.1013232e+04 inf] -// ``` -func Sinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sinh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. -type ResourceApplyProximalAdagradAttr func(optionalAttr) - -// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. -// -// accum += grad * grad -// prox_v = var - lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyProximalAdagrad", - Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Divides sparse updates into the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] /= updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] /= updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions multiply. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterDiv", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Computes the trignometric inverse sine of x element-wise. -// -// The `tf.math.asin` operation returns the inverse of `tf.math.sin`, such that -// if `y = tf.math.sin(x)` then, `x = tf.math.asin(y)`. -// -// **Note**: The output of `tf.math.asin` will lie within the invertible range -// of sine, i.e [-pi/2, pi/2]. -// -// For example: -// -// ```python -// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)] -// x = tf.constant([1.047, 0.785]) -// y = tf.math.sin(x) # [0.8659266, 0.7068252] -// -// tf.math.asin(y) # [1.047, 0.785] = x -// ``` -// -func Asin(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Asin", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes natural logarithm of (1 + x) element-wise. -// -// I.e., \\(y = \log_e (1 + x)\\). -// -// Example: -// -// ```python -// x = tf.constant([0, 0.5, 1, 5]) -// tf.math.log1p(x) ==> [0., 0.4054651, 0.6931472, 1.7917595] -// ``` -func Log1p(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Log1p", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts the quantized `input` tensor into a lower-precision `output`. -// -// Converts the quantized `input` tensor into a lower-precision `output`, using the -// output range specified with `requested_output_min` and `requested_output_max`. -// -// `[input_min, input_max]` are scalar floats that specify the range for the float -// interpretation of the `input` data. For example, if `input_min` is -1.0f and -// `input_max` is 1.0f, and we are dealing with `quint16` quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. -// -// Arguments: -// -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// requested_output_min: The float value that the minimum quantized output value represents. -// requested_output_max: The float value that the maximum quantized output value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. -// -// Returns: -// output -// output_min: The requested_output_min value is copied into this output. -// output_max: The requested_output_max value is copied into this output. -func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - opspec := tf.OpSpec{ - Type: "Requantize", - Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput. -type Conv2DBackpropInputAttr func(optionalAttr) - -// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value - } -} - -// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value. -// -// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith -// dimension, the amount of padding inserted before and after the dimension is -// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If -// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. -// If not specified, defaults to <> -func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv2DBackpropInputDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of convolution with respect to the input. -// -// Arguments: -// input_sizes: An integer vector representing the shape of `input`, -// where `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. Must be in the same order as the dimension specified with -// format. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`. Gradient -// w.r.t. the input of the convolution. -func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv2DBackpropInput", - Input: []tf.Input{ - input_sizes, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes `exp(x) - 1` element-wise. -// -// i.e. `exp(x) - 1` or `e^(x) - 1`, where `x` is the input tensor. -// `e` denotes Euler's number and is approximately equal to 2.718281. -// -// ```python -// x = tf.constant(2.0) -// tf.math.expm1(x) ==> 6.389056 -// -// x = tf.constant([2.0, 8.0]) -// tf.math.expm1(x) ==> array([6.389056, 2979.958], dtype=float32) -// -// x = tf.constant(1 + 1j) -// tf.math.expm1(x) ==> (0.46869393991588515+2.2873552871788423j) -// ``` -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Expm1", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential of x element-wise. \\(y = e^x\\). -// -// This function computes the exponential of every element in the input tensor. -// i.e. `exp(x)` or `e^(x)`, where `x` is the input tensor. -// `e` denotes Euler's number and is approximately equal to 2.718281. -// Output is positive for any real input. -// -// ```python -// x = tf.constant(2.0) -// tf.math.exp(x) ==> 7.389056 -// -// x = tf.constant([2.0, 8.0]) -// tf.math.exp(x) ==> array([7.389056, 2980.958], dtype=float32) -// ``` -// -// For complex numbers, the exponential value is calculated as follows: -// -// ``` -// e^(x+iy) = e^x * e^iy = e^x * (cos y + i sin y) -// ``` -// -// Let's consider complex number 1+1j as an example. -// e^1 * (cos 1 + i sin 1) = 2.7182818284590 * (0.54030230586+0.8414709848j) -// -// ```python -// x = tf.constant(1 + 1j) -// tf.math.exp(x) ==> 1.4686939399158851+2.2873552871788423j -// ``` -func Exp(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Exp", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes square of x element-wise. -// -// I.e., \\(y = x * x = x^2\\). -func Square(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Square", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the inverse of `x` wrt its input. -// -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReciprocalGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the reciprocal of x element-wise. -// -// I.e., \\(y = 1 / x\\). -func Inv(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Inv", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ComplexAbsAttr is an optional argument to ComplexAbs. -type ComplexAbsAttr func(optionalAttr) - -// ComplexAbsTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ComplexAbsTout(value tf.DataType) ComplexAbsAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Computes the complex absolute value of a tensor. -// -// Given a tensor `x` of complex numbers, this operation returns a tensor of type -// `float` or `double` that is the absolute value of each element in `x`. All -// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute -// value is computed as \\( \sqrt{a^2 + b^2}\\). -func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ComplexAbs", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the absolute value of a tensor. -// -// Given a tensor `x`, this operation returns a tensor containing the absolute -// value of each element in `x`. For example, if x is an input element and y is -// an output element, this operation computes \\(y = |x|\\). -func Abs(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Abs", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces a summary of any statistics recorded by the given statistics manager. -func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalStatsAggregatorSummary", - Input: []tf.Input{ - iterator, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MeanAttr is an optional argument to Mean. -type MeanAttr func(optionalAttr) - -// MeanKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MeanKeepDims(value bool) MeanAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the mean of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Mean", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. -type RandomStandardNormalAttr func(optionalAttr) - -// RandomStandardNormalSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. -// -// Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. -// -// Returns A tensor of the specified shape filled with random normal values. -func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomStandardNormal", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the Gauss error function of `x` element-wise. -func Erf(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Erf", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the maximum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such -// that `segment_ids[j] == i`. -// -// If the max is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_max(c, tf.constant([0, 0, 1])) -// # ==> [[4, 3, 3, 4], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMax", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CastAttr is an optional argument to Cast. -type CastAttr func(optionalAttr) - -// CastTruncate sets the optional Truncate attribute to value. -// If not specified, defaults to false -func CastTruncate(value bool) CastAttr { - return func(m optionalAttr) { - m["Truncate"] = value - } -} - -// Cast x of type SrcT to y of DstT. -func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"DstT": DstT} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Cast", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generate a sharded filename. The filename is printf formatted as -// -// %s-%05d-of-%05d, basename, shard, num_shards. -func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilename", - Input: []tf.Input{ - basename, shard, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Elementwise computes the bitwise OR of `x` and `y`. -// -// The result will have those bits set, that are set in `x`, `y` or both. The -// computation is performed on the underlying representations of `x` and `y`. -// -// For example: -// -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64, -// tf.uint8, tf.uint16, tf.uint32, tf.uint64] -// -// for dtype in dtype_list: -// lhs = tf.constant([0, 5, 3, 14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// exp = tf.constant([5, 5, 7, 15], dtype=tf.float32) -// -// res = bitwise_ops.bitwise_or(lhs, rhs) -// tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE -// ``` -// -func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BitwiseOr", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SendAttr is an optional argument to Send. -type SendAttr func(optionalAttr) - -// SendClientTerminated sets the optional client_terminated attribute to value. -// -// value: If set to true, this indicates that the node was added -// to the graph as a result of a client-side feed or fetch of Tensor data, -// in which case the corresponding send or recv is expected to be managed -// locally by the caller. -// If not specified, defaults to false -func SendClientTerminated(value bool) SendAttr { - return func(m optionalAttr) { - m["client_terminated"] = value - } -} - -// Sends the named tensor from send_device to recv_device. -// -// Arguments: -// tensor: The tensor to send. -// tensor_name: The name of the tensor to send. -// send_device: The name of the device sending the tensor. -// send_device_incarnation: The current incarnation of send_device. -// recv_device: The name of the device receiving the tensor. -// -// Returns the created operation. -func Send(scope *Scope, tensor tf.Output, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...SendAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Send", - Input: []tf.Input{ - tensor, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// BatchMatMulV2Attr is an optional argument to BatchMatMulV2. -type BatchMatMulV2Attr func(optionalAttr) - -// BatchMatMulV2AdjX sets the optional adj_x attribute to value. -// -// value: If `True`, adjoint the slices of `x`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulV2AdjX(value bool) BatchMatMulV2Attr { - return func(m optionalAttr) { - m["adj_x"] = value - } -} - -// BatchMatMulV2AdjY sets the optional adj_y attribute to value. -// -// value: If `True`, adjoint the slices of `y`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulV2AdjY(value bool) BatchMatMulV2Attr { - return func(m optionalAttr) { - m["adj_y"] = value - } -} - -// Multiplies slices of two tensors in batches. -// -// Multiplies all slices of `Tensor` `x` and `y` (each slice can be -// viewed as an element of a batch), and arranges the individual results -// in a single output tensor of the same batch size. Each of the -// individual slices can optionally be adjointed (to adjoint a matrix -// means to transpose and conjugate it) before multiplication by setting -// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. -// -// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` -// and `[..., r_y, c_y]`. -// -// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: -// -// r_o = c_x if adj_x else r_x -// c_o = r_y if adj_y else c_y -// -// It is computed as: -// -// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) -// -// *NOTE*: `BatchMatMulV2` supports broadcasting in the batch dimensions. More -// about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). -// -// -// Arguments: -// x: 2-D or higher with shape `[..., r_x, c_x]`. -// y: 2-D or higher with shape `[..., r_y, c_y]`. -// -// Returns 3-D or higher with shape `[..., r_o, c_o]` -func BatchMatMulV2(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BatchMatMulV2", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns 0 if x == 0, and x / y otherwise, elementwise. -func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Xdivy", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Compute the pairwise cross product. -// -// `a` and `b` must be the same shape; they can either be simple 3-element vectors, -// or any shape where the innermost dimension is 3. In the latter case, each pair -// of corresponding 3-element vectors is cross-multiplied independently. -// -// Arguments: -// a: A tensor containing 3-element vectors. -// b: Another tensor, of same type and shape as `a`. -// -// Returns Pairwise cross product of the vectors in `a` and `b`. -func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Cross", - Input: []tf.Input{ - a, b, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Sends `input` to all devices that are connected to the output. -// -// Sends `input` to all devices that are connected to the output. -// -// The graph should be constructed so that all ops connected to the output have a -// valid device assignment, and the op itself is assigned one of these devices. -// -// input: The input to the broadcast. -// output: The same as input. -// shape: The shape of the input tensor. -// -func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "NcclBroadcast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv2DAttr is an optional argument to Conv2D. -type Conv2DAttr func(optionalAttr) - -// Conv2DUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr { - return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value - } -} - -// Conv2DExplicitPaddings sets the optional explicit_paddings attribute to value. -// -// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith -// dimension, the amount of padding inserted before and after the dimension is -// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If -// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. -// If not specified, defaults to <> -func Conv2DExplicitPaddings(value []int64) Conv2DAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// Conv2DDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func Conv2DDataFormat(value string) Conv2DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv2DDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv2DDilations(value []int64) Conv2DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2-D convolution given 4-D `input` and `filter` tensors. -// -// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` -// and a filter / kernel tensor of shape -// `[filter_height, filter_width, in_channels, out_channels]`, this op -// performs the following: -// -// 1. Flattens the filter to a 2-D matrix with shape -// `[filter_height * filter_width * in_channels, output_channels]`. -// 2. Extracts image patches from the input tensor to form a *virtual* -// tensor of shape `[batch, out_height, out_width, -// filter_height * filter_width * in_channels]`. -// 3. For each patch, right-multiplies the filter matrix and the image patch -// vector. -// -// In detail, with the default NHWC format, -// -// output[b, i, j, k] = -// sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] * -// filter[di, dj, q, k] -// -// Must have `strides[0] = strides[3] = 1`. For the most common case of the same -// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. -// -// Arguments: -// input: A 4-D tensor. The dimension order is interpreted according to the value -// of `data_format`, see below for details. -// filter: A 4-D tensor of shape -// `[filter_height, filter_width, in_channels, out_channels]` -// strides: 1-D tensor of length 4. The stride of the sliding window for each -// dimension of `input`. The dimension order is determined by the value of -// `data_format`, see below for details. -// padding: The type of padding algorithm to use. -// -// Returns A 4-D tensor. The dimension order is determined by the value of -// `data_format`, see below for details. -func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv2DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv2D", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns locations of nonzero / true values in a tensor. -// -// This operation returns the coordinates of true elements in `condition`. The -// coordinates are returned in a 2-D tensor where the first dimension (rows) -// represents the number of true elements, and the second dimension (columns) -// represents the coordinates of the true elements. Keep in mind, the shape of -// the output tensor can vary depending on how many true values there are in -// `condition`. Indices are output in row-major order. -// -// For example: -// -// ``` -// # 'input' tensor is [[True, False] -// # [True, False]] -// # 'input' has two true values, so output has two coordinates. -// # 'input' has rank of 2, so coordinates have two indices. -// where(input) ==> [[0, 0], -// [1, 0]] -// -// # `condition` tensor is [[[True, False] -// # [True, False]] -// # [[False, True] -// # [False, True]] -// # [[False, False] -// # [False, True]]] -// # 'input' has 5 true values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `condition` tensor is [[[1.5, 0.0] -// # [-0.5, 0.0]] -// # [[0.0, 0.25] -// # [0.0, 0.75]] -// # [[0.0, 0.0] -// # [0.0, 0.01]]] -// # 'input' has 5 nonzero values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// -// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.5j, 0.0 + 0.0j]] -// # [[0.0 + 0.0j, 0.25 + 1.5j] -// # [0.0 + 0.0j, 0.75 + 0.0j]] -// # [[0.0 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.0j, 0.01 + 0.0j]]] -// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// ``` -func Where(scope *Scope, condition tf.Output) (index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Where", - Input: []tf.Input{ - condition, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated, use python implementation tf.linalg.matrix_exponential. -// -// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead. -func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixExponential", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reduces `input` from `num_devices` using `reduction` to a single device. -// -// Reduces `input` from `num_devices` using `reduction` to a single device. -// -// The graph should be constructed so that all inputs have a valid device -// assignment, and the op itself is assigned one of these devices. -// -// input: The input to the reduction. -// data: the value of the reduction across all `num_devices` devices. -// reduction: the reduction operation to perform. -func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"reduction": reduction} - opspec := tf.OpSpec{ - Type: "NcclReduce", - Input: []tf.Input{ - tf.OutputList(input), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize. -type QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr func(optionalAttr) - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType sets the optional out_type attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_QUINT8 -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value. -// -// value: List of dilation values. -// If not specified, defaults to -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList sets the optional padding_list attribute to value. -// If not specified, defaults to <> -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["padding_list"] = value - } -} - -// Computes quantized depthwise Conv2D with Bias, Relu and Requantize. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// bias: The original bias tensor. -// min_input: The float value that the minimum quantized input value represents. -// max_input: The float value that the maximum quantized input value represents. -// min_filter: The float value that the minimum quantized filter value represents. -// max_filter: The float value that the maximum quantized filter value represents. -// min_freezed_output: The minimum float value of the output tensor. -// max_freezed_output: The maximum float value of the output tensor. -// strides: List of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The float value that the minimum quantized output value represents. -// max_output: The float value that the maximum quantized output value represents. -func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize", - Input: []tf.Input{ - input, filter, bias, min_input, max_input, min_filter, max_filter, min_freezed_output, max_freezed_output, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedDepthwiseConv2DWithBiasAndReluAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndRelu. -type QuantizedDepthwiseConv2DWithBiasAndReluAttr func(optionalAttr) - -// QuantizedDepthwiseConv2DWithBiasAndReluOutType sets the optional out_type attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_QINT32 -func QuantizedDepthwiseConv2DWithBiasAndReluOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluDilations sets the optional dilations attribute to value. -// -// value: List of dilation values. -// If not specified, defaults to -func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasAndReluPaddingList sets the optional padding_list attribute to value. -// If not specified, defaults to <> -func QuantizedDepthwiseConv2DWithBiasAndReluPaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr { - return func(m optionalAttr) { - m["padding_list"] = value - } -} - -// Computes quantized depthwise Conv2D with Bias and Relu. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// bias: The original bias tensor. -// min_input: The float value that the minimum quantized input value represents. -// max_input: The float value that the maximum quantized input value represents. -// min_filter: The float value that the minimum quantized filter value represents. -// max_filter: The float value that the maximum quantized filter value represents. -// strides: List of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The float value that the minimum quantized output value represents. -// max_output: The float value that the maximum quantized output value represents. -func QuantizedDepthwiseConv2DWithBiasAndRelu(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedDepthwiseConv2DWithBiasAndRelu", - Input: []tf.Input{ - input, filter, bias, min_input, max_input, min_filter, max_filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints. -type MergeV2CheckpointsAttr func(optionalAttr) - -// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value. -// -// value: see above. -// If not specified, defaults to true -func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr { - return func(m optionalAttr) { - m["delete_old_dirs"] = value - } -} - -// V2 format specific: merges the metadata files of sharded checkpoints. The -// -// result is one logical checkpoint, with one physical metadata file and renamed -// data files. -// -// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup. -// -// If delete_old_dirs is true, attempts to delete recursively the dirname of each -// path in the input checkpoint_prefixes. This is useful when those paths are non -// user-facing temporary locations. -// -// Arguments: -// checkpoint_prefixes: prefixes of V2 checkpoints to merge. -// destination_prefix: scalar. The desired final prefix. Allowed to be the same -// as one of the checkpoint_prefixes. -// -// Returns the created operation. -func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MergeV2Checkpoints", - Input: []tf.Input{ - checkpoint_prefixes, destination_prefix, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// QuantizedDepthwiseConv2DWithBiasAttr is an optional argument to QuantizedDepthwiseConv2DWithBias. -type QuantizedDepthwiseConv2DWithBiasAttr func(optionalAttr) - -// QuantizedDepthwiseConv2DWithBiasOutType sets the optional out_type attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_QINT32 -func QuantizedDepthwiseConv2DWithBiasOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedDepthwiseConv2DWithBiasDilations sets the optional dilations attribute to value. -// -// value: List of dilation values. -// If not specified, defaults to -func QuantizedDepthwiseConv2DWithBiasDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes quantized depthwise Conv2D with Bias. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// bias: The original bias tensor. -// min_input: The float value that the minimum quantized input value represents. -// max_input: The float value that the maximum quantized input value represents. -// min_filter: The float value that the minimum quantized filter value represents. -// max_filter: The float value that the maximum quantized filter value represents. -// strides: List of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The float value that the minimum quantized output value represents. -// max_output: The float value that the maximum quantized output value represents. -func QuantizedDepthwiseConv2DWithBias(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedDepthwiseConv2DWithBias", - Input: []tf.Input{ - input, filter, bias, min_input, max_input, min_filter, max_filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedDepthwiseConv2DAttr is an optional argument to QuantizedDepthwiseConv2D. -type QuantizedDepthwiseConv2DAttr func(optionalAttr) - -// QuantizedDepthwiseConv2DOutType sets the optional out_type attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_QINT32 -func QuantizedDepthwiseConv2DOutType(value tf.DataType) QuantizedDepthwiseConv2DAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedDepthwiseConv2DDilations sets the optional dilations attribute to value. -// -// value: List of dilation values. -// If not specified, defaults to -func QuantizedDepthwiseConv2DDilations(value []int64) QuantizedDepthwiseConv2DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes quantized depthwise Conv2D. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// min_input: The float value that the minimum quantized input value represents. -// max_input: The float value that the maximum quantized input value represents. -// min_filter: The float value that the minimum quantized filter value represents. -// max_filter: The float value that the maximum quantized filter value represents. -// strides: List of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The float value that the minimum quantized output value represents. -// max_output: The float value that the maximum quantized output value represents. -func QuantizedDepthwiseConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedDepthwiseConv2D", - Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. -type DecodeAndCropJpegAttr func(optionalAttr) - -// DecodeAndCropJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeAndCropJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } -} - -// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["dct_method"] = value - } -} - -// Decode and Crop a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// It is equivalent to a combination of decode and crop, but much faster by only -// decoding partial jpeg image. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeAndCropJpeg", - Input: []tf.Input{ - contents, crop_window, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedConv2DPerChannelAttr is an optional argument to QuantizedConv2DPerChannel. -type QuantizedConv2DPerChannelAttr func(optionalAttr) - -// QuantizedConv2DPerChannelOutType sets the optional out_type attribute to value. -// -// value: The quantized type of output tensor that needs to be converted. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DPerChannelOutType(value tf.DataType) QuantizedConv2DPerChannelAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedConv2DPerChannelDilations sets the optional dilations attribute to value. -// -// value: list of dilation values. -// If not specified, defaults to -func QuantizedConv2DPerChannelDilations(value []int64) QuantizedConv2DPerChannelAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes QuantizedConv2D per channel. -// -// Arguments: -// input: The original input tensor. -// filter: The original filter tensor. -// min_input: The minimum value of the input tensor -// max_input: The maximum value of the input tensor. -// min_filter: The minimum value of the filter tensor. -// max_filter: The maximum value of the filter tensor. -// strides: list of stride values. -// -// -// Returns: -// output: The output tensor. -// min_output: The minimum value of the final output tensor. -// max_output: The maximum value of the final output tensor. -func QuantizedConv2DPerChannel(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DPerChannelAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedConv2DPerChannel", - Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Concatenates quantized tensors along one dimension. -// -// Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// input_mins: The minimum scalar values for each of the input tensors. -// input_maxes: The maximum scalar values for each of the input tensors. -// -// Returns: -// output: A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -// output_min: The float value that the minimum quantized output value represents. -// output_max: The float value that the maximum quantized output value represents. -func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QuantizedConcat", - Input: []tf.Input{ - concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the batched diagonal part of a batched tensor. -// -// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched -// `input`. -// -// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`. -// Let `max_diag_len` be the maximum length among all diagonals to be extracted, -// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))` -// Let `num_diags` be the number of diagonals to extract, -// `num_diags = k[1] - k[0] + 1`. -// -// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape -// `[I, J, ..., L, max_diag_len]` and values: -// -// ``` -// diagonal[i, j, ..., l, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`. -// -// Otherwise, the output tensor has rank `r` with dimensions -// `[I, J, ..., L, num_diags, max_diag_len]` with values: -// -// ``` -// diagonal[i, j, ..., l, m, n] -// = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N, -// padding_value ; otherwise. -// ``` -// where `d = k[1] - m`, `y = max(-d, 0)`, and `x = max(d, 0)`. -// -// The input must be at least a matrix. -// -// For example: -// -// ``` -// input = np.array([[[1, 2, 3, 4], # Input shape: (2, 3, 4) -// [5, 6, 7, 8], -// [9, 8, 7, 6]], -// [[5, 4, 3, 2], -// [1, 2, 3, 4], -// [5, 6, 7, 8]]]) -// -// # A main diagonal from each batch. -// tf.matrix_diag_part(input) ==> [[1, 6, 7], # Output shape: (2, 3) -// [5, 2, 7]] -// -// # A superdiagonal from each batch. -// tf.matrix_diag_part(input, k = 1) -// ==> [[2, 7, 6], # Output shape: (2, 3) -// [4, 3, 8]] -// -// # A tridiagonal band from each batch. -// tf.matrix_diag_part(input, k = (-1, 1)) -// ==> [[[2, 7, 6], # Output shape: (2, 3, 3) -// [1, 6, 7], -// [5, 8, 0]], -// [[4, 3, 8], -// [5, 2, 7], -// [1, 6, 0]]] -// -// # Padding value = 9 -// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9) -// ==> [[[4, 9, 9], # Output shape: (2, 3, 3) -// [3, 8, 9], -// [2, 7, 6]], -// [[2, 9, 9], -// [3, 4, 9], -// [4, 3, 8]]] -// ``` -// -// Arguments: -// input: Rank `r` tensor where `r >= 2`. -// k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main -// diagonal, and negative value means subdiagonals. `k` can be a single integer -// (for a single diagonal) or a pair of integers specifying the low and high ends -// of a matrix band. `k[0]` must not be larger than `k[1]`. -// padding_value: The value to fill the area outside the specified diagonal band with. -// Default is 0. -// -// Returns The extracted diagonal(s). -func MatrixDiagPartV2(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output) (diagonal tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiagPartV2", - Input: []tf.Input{ - input, k, padding_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// A container for a multi device iterator resource. -// -// Returns: -// handle: A handle to a multi device iterator that can be passed to a -// "MultiDeviceIteratorGetNextFromShard" op. In contrast to MultiDeviceIterator, -// AnonymousIterator prevents resource sharing by name, and does not keep a -// reference to the resource container. -// deleter: A variant deleter that should be passed into the op that deletes the iterator. -func AnonymousMultiDeviceIterator(scope *Scope, devices []string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output, deleter tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"devices": devices, "output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "AnonymousMultiDeviceIterator", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Provides the time since epoch in seconds. -// -// Returns the timestamp as a `float64` for seconds since the Unix epoch. -// -// Note: the timestamp is computed when the op is executed, not when it is added -// to the graph. -func Timestamp(scope *Scope) (ts tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Timestamp", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of (x <= y) element-wise. -// -// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// Example: -// -// ```python -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5]) -// tf.math.less_equal(x, y) ==> [True, True, False] -// -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5, 6, 6]) -// tf.math.less_equal(x, y) ==> [True, True, True] -// ``` -func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LessEqual", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingADAMParametersGradAccumDebug. -type LoadTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load ADAM embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the ADAM optimization algorithm. -// momenta: Value of momenta used in the ADAM optimization algorithm. -// velocities: Value of velocities used in the ADAM optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the ADAM optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingADAMParametersGradAccumDebug", - Input: []tf.Input{ - parameters, momenta, velocities, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RetrieveTPUEmbeddingRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParameters. -type RetrieveTPUEmbeddingRMSPropParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingRMSPropParametersTableName(value string) RetrieveTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingRMSPropParametersConfig(value string) RetrieveTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve RMSProp embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the RMSProp optimization algorithm. -// ms: Parameter ms updated by the RMSProp optimization algorithm. -// mom: Parameter mom updated by the RMSProp optimization algorithm. -func RetrieveTPUEmbeddingRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingRMSPropParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2. -type Conv3DBackpropInputV2Attr func(optionalAttr) - -// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// Arguments: -// input_sizes: An integer vector representing the tensor shape of `input`, -// where `input` is a 5-D -// `[batch, depth, rows, cols, in_channels]` tensor. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3DBackpropInputV2", - Input: []tf.Input{ - input_sizes, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LRNAttr is an optional argument to LRN. -type LRNAttr func(optionalAttr) - -// LRNDepthRadius sets the optional depth_radius attribute to value. -// -// value: 0-D. Half-width of the 1-D normalization window. -// If not specified, defaults to 5 -func LRNDepthRadius(value int64) LRNAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNBias sets the optional bias attribute to value. -// -// value: An offset (usually positive to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNBias(value float32) LRNAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNAlpha sets the optional alpha attribute to value. -// -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNAlpha(value float32) LRNAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// LRNBeta sets the optional beta attribute to value. -// -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNBeta(value float32) LRNAttr { - return func(m optionalAttr) { - m["beta"] = value - } -} - -// Local Response Normalization. -// -// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last -// dimension), and each vector is normalized independently. Within a given vector, -// each component is divided by the weighted, squared sum of inputs within -// `depth_radius`. In detail, -// -// sqr_sum[a, b, c, d] = -// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) -// output = input / (bias + alpha * sqr_sum) ** beta -// -// For details, see [Krizhevsky et al., ImageNet classification with deep -// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). -// -// Arguments: -// input: 4-D. -func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LRN", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedMatMulWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedMatMulWithBiasAndReluAndRequantize. -type QuantizedMatMulWithBiasAndReluAndRequantizeAttr func(optionalAttr) - -// QuantizedMatMulWithBiasAndReluAndRequantizeToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedMatMulWithBiasAndReluAndRequantizeToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode sets the optional input_quant_mode attribute to value. -// -// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED. -// If not specified, defaults to "MIN_FIRST" -func QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAndRequantizeAttr { - return func(m optionalAttr) { - m["input_quant_mode"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b` with bias -// add and relu and requantize fusion. -// -// The inputs must be two-dimensional matrices and 1D bias vector. And the inner -// dimension of `a` (after being transposed if `transpose_a` is non-zero) must -// match the outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). Then do broadcast add operation with bias values on the matrix -// multiplication result. The bias size must match inner dimension of `b`. Then do -// relu activation to get non-negative result. Then do requantize operation to get -// final uint8 result. -// -// Arguments: -// a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`. -// b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`. -// bias: A 1D bias tensor with size matching with inner dimension of `b` (after being -// transposed if `transposed_b` is non-zero). -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. -// min_freezed_output: The float value that the highest quantized output value after requantize. -// -// -// Returns: -// out -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedMatMulWithBiasAndReluAndRequantize(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, optional ...QuantizedMatMulWithBiasAndReluAndRequantizeAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMatMulWithBiasAndReluAndRequantize", - Input: []tf.Input{ - a, b, bias, min_a, max_a, min_b, max_b, min_freezed_output, max_freezed_output, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedMatMulWithBiasAndReluAttr is an optional argument to QuantizedMatMulWithBiasAndRelu. -type QuantizedMatMulWithBiasAndReluAttr func(optionalAttr) - -// QuantizedMatMulWithBiasAndReluToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulWithBiasAndReluToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulWithBiasAndReluTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluTransposeA(value bool) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulWithBiasAndReluTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasAndReluTransposeB(value bool) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulWithBiasAndReluInputQuantMode sets the optional input_quant_mode attribute to value. -// -// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED. -// If not specified, defaults to "MIN_FIRST" -func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAttr { - return func(m optionalAttr) { - m["input_quant_mode"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b` with bias -// add and relu fusion. -// -// The inputs must be two-dimensional matrices and 1D bias vector. And the inner -// dimension of `a` (after being transposed if `transpose_a` is non-zero) must -// match the outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). Then do broadcast add operation with bias values on the matrix -// multiplication result. The bias size must match inner dimension of `b`. Then do -// relu activation to get non-negative result. -// -// Arguments: -// a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`. -// b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`. -// bias: A 1D bias tensor with size matching with inner dimension of `b` (after being -// transposed if `transposed_b` is non-zero). -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. -// -// Returns: -// out -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedMatMulWithBiasAndRelu(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAndReluAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMatMulWithBiasAndRelu", - Input: []tf.Input{ - a, b, bias, min_a, max_a, min_b, max_b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedMatMulWithBiasAttr is an optional argument to QuantizedMatMulWithBias. -type QuantizedMatMulWithBiasAttr func(optionalAttr) - -// QuantizedMatMulWithBiasToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulWithBiasToutput(value tf.DataType) QuantizedMatMulWithBiasAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulWithBiasTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasTransposeA(value bool) QuantizedMatMulWithBiasAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulWithBiasTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulWithBiasTransposeB(value bool) QuantizedMatMulWithBiasAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulWithBiasInputQuantMode sets the optional input_quant_mode attribute to value. -// -// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED. -// If not specified, defaults to "MIN_FIRST" -func QuantizedMatMulWithBiasInputQuantMode(value string) QuantizedMatMulWithBiasAttr { - return func(m optionalAttr) { - m["input_quant_mode"] = value - } -} - -// Performs a quantized matrix multiplication of `a` by the matrix `b` with bias -// add. -// -// The inputs must be two-dimensional matrices and 1D bias vector. And the inner -// dimension of `a` (after being transposed if `transpose_a` is non-zero) must -// match the outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). Then do broadcast add operation with bias values on the matrix -// multiplication result. The bias size must match inner dimension of `b`. -// -// Arguments: -// a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`. -// b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`. -// bias: A 1D bias tensor with size matching inner dimension of `b` (after being -// transposed if `transposed_b` is non-zero). -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. -// -// Returns: -// out -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedMatMulWithBias(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMatMulWithBias", - Input: []tf.Input{ - a, b, bias, min_a, max_a, min_b, max_b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// TensorArrayGatherV2Attr is an optional argument to TensorArrayGatherV2. -type TensorArrayGatherV2Attr func(optionalAttr) - -// TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// Deprecated. Use TensorArrayGatherV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayGatherV3 -func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV2Attr) (value tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayGatherV2", - Input: []tf.Input{ - handle, indices, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RFFT3DAttr is an optional argument to RFFT3D. -type RFFT3DAttr func(optionalAttr) - -// RFFT3DTcomplex sets the optional Tcomplex attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func RFFT3DTcomplex(value tf.DataType) RFFT3DAttr { - return func(m optionalAttr) { - m["Tcomplex"] = value - } -} - -// 3D real-valued fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 3 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. -// -// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the their 3D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfftn with 3 dimensions. -// @end_compatibility -func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFT3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RFFT3D", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reorders a SparseTensor into the canonical, row-major ordering. -// -// Note that by convention, all sparse ops preserve the canonical ordering along -// increasing dimension number. The only time ordering can be violated is during -// manual manipulation of the indices and values vectors to add entries. -// -// Reordering does not affect the shape of the SparseTensor. -// -// If the tensor has rank `R` and `N` non-empty values, `input_indices` has -// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// -// Returns: -// output_indices: 2-D. `N x R` matrix with the same indices as input_indices, but -// in canonical row-major ordering. -// output_values: 1-D. `N` non-empty values corresponding to `output_indices`. -func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseReorder", - Input: []tf.Input{ - input_indices, input_values, input_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Generates fingerprint values. -// -// Generates fingerprint values of `data`. -// -// Fingerprint op considers the first dimension of `data` as the batch dimension, -// and `output[i]` contains the fingerprint value generated from contents in -// `data[i, ...]` for all `i`. -// -// Fingerprint op writes fingerprint values as byte arrays. For example, the -// default method `farmhash64` generates a 64-bit fingerprint value at a time. -// This 8-byte value is written out as an `uint8` array of size 8, in little-endian -// order. -// -// For example, suppose that `data` has data type `DT_INT32` and shape (2, 3, 4), -// and that the fingerprint method is `farmhash64`. In this case, the output shape -// is (2, 8), where 2 is the batch dimension size of `data`, and 8 is the size of -// each fingerprint value in bytes. `output[0, :]` is generated from 12 integers in -// `data[0, :, :]` and similarly `output[1, :]` is generated from other 12 integers -// in `data[1, :, :]`. -// -// Note that this op fingerprints the raw underlying buffer, and it does not -// fingerprint Tensor's metadata such as data type and/or shape. For example, the -// fingerprint values are invariant under reshapes and bitcasts as long as the -// batch dimension remain the same: -// -// ``` -// Fingerprint(data) == Fingerprint(Reshape(data, ...)) -// Fingerprint(data) == Fingerprint(Bitcast(data, ...)) -// ``` -// -// For string data, one should expect `Fingerprint(data) != -// Fingerprint(ReduceJoin(data))` in general. -// -// Arguments: -// data: Must have rank 1 or higher. -// method: Fingerprint method used by this op. Currently available method is -// `farmhash::fingerprint64`. -// -// Returns A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to -// `data`'s first dimension, and the second dimension size depends on the -// fingerprint algorithm. -func Fingerprint(scope *Scope, data tf.Output, method tf.Output) (fingerprint tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fingerprint", - Input: []tf.Input{ - data, method, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CopyAttr is an optional argument to Copy. -type CopyAttr func(optionalAttr) - -// CopyTensorName sets the optional tensor_name attribute to value. -// -// value: The name of the input tensor. -// If not specified, defaults to "" -func CopyTensorName(value string) CopyAttr { - return func(m optionalAttr) { - m["tensor_name"] = value - } -} - -// CopyDebugOpsSpec sets the optional debug_ops_spec attribute to value. -// -// value: A list of debug op spec (op, url, gated_grpc) for attached debug -// ops. Each element of the list has the format -// ;;, wherein gated_grpc is boolean represented -// as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1", -// "DebugIdentity;file:///tmp/tfdbg_1;0". -// If not specified, defaults to <> -func CopyDebugOpsSpec(value []string) CopyAttr { - return func(m optionalAttr) { - m["debug_ops_spec"] = value - } -} - -// Copy a tensor from CPU-to-CPU or GPU-to-GPU. -// -// Performs CPU-to-CPU or GPU-to-GPU deep-copying of tensor, depending on the -// device on which the tensor is allocated. -// N.B.: If the all downstream attached debug ops are disabled given the current -// gRPC gating status, the output will simply forward the input tensor without -// deep-copying. See the documentation of Debug* ops for more details. -// -// Unlike the CopyHost Op, this op does not have HostMemory constraint on its -// input or output. -// -// Arguments: -// input: Input tensor. -func Copy(scope *Scope, input tf.Output, optional ...CopyAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Copy", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Updates specified rows 'i' with values 'v'. -// -// Computes `x[i, :] = v; return x`. -// -// Originally this function is mutative however for compilation we make this -// operation create / operate on a copy of `x`. -// -// Arguments: -// x: A tensor of type `T`. -// i: A vector. Indices into the left-most dimension of `x`. -// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. -// -// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. -func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InplaceUpdate", - Input: []tf.Input{ - x, i, v, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Table initializer that takes two tensors for keys and values respectively. -// -// Arguments: -// table_handle: Handle to a table which will be initialized. -// keys: Keys of type Tkey. -// values: Values of type Tval. -// -// Returns the created operation. -func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InitializeTableV2", - Input: []tf.Input{ - table_handle, keys, values, - }, - } - return scope.AddOperation(opspec) -} - -// BatchToSpace for N-D tensors of type T. -// -// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape -// `block_shape + [batch]`, interleaves these blocks back into the grid defined by -// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as -// the input. The spatial dimensions of this intermediate result are then -// optionally cropped according to `crops` to produce the output. This is the -// reverse of SpaceToBatch. See below for a precise description. -// -// Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has M dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// crops: 2-D with shape `[M, 2]`, all values must be >= 0. -// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input -// dimension `i + 1`, which corresponds to spatial dimension `i`. It is -// required that -// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. -// -// This operation is equivalent to the following steps: -// -// 1. Reshape `input` to `reshaped` of shape: -// [block_shape[0], ..., block_shape[M-1], -// batch / prod(block_shape), -// input_shape[1], ..., input_shape[N-1]] -// -// 2. Permute dimensions of `reshaped` to produce `permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1], block_shape[0], -// ..., -// input_shape[M], block_shape[M-1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// 3. Reshape `permuted` to produce `reshaped_permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0], -// ..., -// input_shape[M] * block_shape[M-1], -// -// input_shape[M+1], -// ..., -// input_shape[N-1]] -// -// 4. Crop the start and end of dimensions `[1, ..., M]` of -// `reshaped_permuted` according to `crops` to produce the output of shape: -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], -// ..., -// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -// -// The output tensor has shape `[1, 4, 4, 1]` and value: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [2, 0]]`: -// -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` -// -// The output tensor has shape `[2, 2, 4, 1]` and value: -// -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BatchToSpaceND", - Input: []tf.Input{ - input, block_shape, crops, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. -type FIFOQueueV2Attr func(optionalAttr) - -// FIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// FIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// FIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FIFOQueueV2Container(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Quantized Batch normalization. -// -// This op is deprecated and will be removed in the future. Prefer -// `tf.nn.batch_normalization`. -// -// Arguments: -// t: A 4D input Tensor. -// t_min: The value represented by the lowest quantized input. -// t_max: The value represented by the highest quantized input. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// m_min: The value represented by the lowest quantized mean. -// m_max: The value represented by the highest quantized mean. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// v_min: The value represented by the lowest quantized variance. -// v_max: The value represented by the highest quantized variance. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// beta_min: The value represented by the lowest quantized offset. -// beta_max: The value represented by the highest quantized offset. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// gamma_min: The value represented by the lowest quantized gamma. -// gamma_max: The value represented by the highest quantized gamma. -// -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} - opspec := tf.OpSpec{ - Type: "QuantizedBatchNormWithGlobalNormalization", - Input: []tf.Input{ - t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. -type ResourceStridedSliceAssignAttr func(optionalAttr) - -// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} - -// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["shrink_axis_mask"] = value - } -} - -// Assign `value` to the sliced l-value reference of `ref`. -// -// The values of `value` are assigned to the positions in the variable -// `ref` that are selected by the slice parameters. The slice parameters -// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`. -// -// NOTE this op currently does not support broadcasting and so `value`'s -// shape must be exactly the shape produced by the slice of `ref`. -// -// Returns the created operation. -func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceStridedSliceAssign", - Input: []tf.Input{ - ref, begin, end, strides, value, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// QuantizedRelu6Attr is an optional argument to QuantizedRelu6. -type QuantizedRelu6Attr func(optionalAttr) - -// QuantizedRelu6OutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` -// -// Arguments: -// -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. -// -// Returns: -// activations: Has the same output shape as "features". -// min_activations: The float value that the lowest quantized value represents. -// max_activations: The float value that the highest quantized value represents. -func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedRelu6", - Input: []tf.Input{ - features, min_features, max_features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. -type DataFormatVecPermuteAttr func(optionalAttr) - -// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["src_format"] = value - } -} - -// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["dst_format"] = value - } -} - -// Returns the permuted vector/tensor in the destination data format given the -// -// one in the source data format. -// -// Arguments: -// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. -// -// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. -func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DataFormatVecPermute", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds Tensor 'bias' to Tensor 'input' for Quantized types. -// -// Broadcasts the values of bias on dimensions 0..N-2 of 'input'. -// -// Arguments: -// -// bias: A 1D bias Tensor with size matching the last dimension of 'input'. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_bias: The float value that the lowest quantized bias value represents. -// max_bias: The float value that the highest quantized bias value represents. -// -// -// Returns: -// output -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - opspec := tf.OpSpec{ - Type: "QuantizedBiasAdd", - Input: []tf.Input{ - input, bias, min_input, max_input, min_bias, max_bias, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2. -type MutableDenseHashTableV2Attr func(optionalAttr) - -// MutableDenseHashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value. -// -// value: The shape of each value. -// If not specified, defaults to <> -func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value - } -} - -// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value. -// -// value: The initial number of hash table buckets. Must be a power -// to 2. -// If not specified, defaults to 131072 -func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["initial_num_buckets"] = value - } -} - -// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value. -// -// value: The maximum ratio between number of entries and number of -// buckets before growing the table. Must be between 0 and 1. -// If not specified, defaults to 0.8 -func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["max_load_factor"] = value - } -} - -// Creates an empty hash table that uses tensors as the backing store. -// -// It uses "open addressing" with quadratic reprobing to resolve -// collisions. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. -// -// Arguments: -// empty_key: The key used to represent empty key buckets internally. Must not -// be used in insert or lookup operations. -// -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableDenseHashTableV2", - Input: []tf.Input{ - empty_key, deleted_key, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. -type FractionalAvgPoolGradAttr func(optionalAttr) - -// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` -// -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. -// If not specified, defaults to false -func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { - return func(m optionalAttr) { - m["overlapping"] = value - } -} - -// Computes gradient of the FractionalAvgPool function. -// -// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for -// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of -// out_backprop to those indices that form the same pooling cell. Therefore, we -// just need to know the shape of original input tensor, instead of the whole -// tensor. -// -// Arguments: -// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_avg_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. -// -// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. -func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FractionalAvgPoolGrad", - Input: []tf.Input{ - orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad. -type FractionalMaxPoolGradAttr func(optionalAttr) - -// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` -// -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [20, 16] for fractional max pooling. -// If not specified, defaults to false -func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr { - return func(m optionalAttr) { - m["overlapping"] = value - } -} - -// Computes gradient of the FractionalMaxPool function. -// -// Arguments: -// orig_input: Original input for `fractional_max_pool` -// orig_output: Original output for `fractional_max_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_max_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. -// -// Returns 4-D. Gradients w.r.t. the input of `fractional_max_pool`. -func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FractionalMaxPoolGrad", - Input: []tf.Input{ - orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// NthElementAttr is an optional argument to NthElement. -type NthElementAttr func(optionalAttr) - -// NthElementReverse sets the optional reverse attribute to value. -// -// value: When set to True, find the nth-largest value in the vector and vice -// versa. -// If not specified, defaults to false -func NthElementReverse(value bool) NthElementAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Finds values of the `n`-th order statistic for the last dimension. -// -// If the input is a vector (rank-1), finds the entries which is the nth-smallest -// value in the vector and outputs their values as scalar tensor. -// -// For matrices (resp. higher rank input), computes the entries which is the -// nth-smallest value in each row (resp. vector along the last dimension). Thus, -// -// values.shape = input.shape[:-1] -// -// Arguments: -// input: 1-D or higher with last dimension at least `n+1`. -// n: 0-D. Position of sorted vector to select along the last dimension (along -// each row for matrices). Valid range of n is `[0, input.shape[:-1])` -// -// Returns The `n`-th order statistic along each last dimensional slice. -func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NthElement", - Input: []tf.Input{ - input, n, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Pads a tensor. -// -// This operation pads `input` according to the `paddings` and `constant_values` -// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is -// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many padding values to add before the contents of `input` in that dimension, -// and `paddings[D, 1]` indicates how many padding values to add after the contents -// of `input` in that dimension. `constant_values` is a scalar tensor of the same -// type as `input` that indicates the value to use for padding `input`. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # 'constant_values' is 0 -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "PadV2", - Input: []tf.Input{ - input, paddings, constant_values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes cos of x element-wise. -// -// Given an input tensor, this function computes cosine of every -// element in the tensor. Input range is `(-inf, inf)` and -// output range is `[-1,1]`. If input lies outside the boundary, `nan` -// is returned. -// -// ```python -// x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")]) -// tf.math.cos(x) ==> [nan -0.91113025 0.87758255 0.5403023 0.36235774 0.48718765 -0.95215535 nan] -// ``` -func Cos(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Cos", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TopKV2Attr is an optional argument to TopKV2. -type TopKV2Attr func(optionalAttr) - -// TopKV2Sorted sets the optional sorted attribute to value. -// -// value: If true the resulting `k` elements will be sorted by the values in -// descending order. -// If not specified, defaults to true -func TopKV2Sorted(value bool) TopKV2Attr { - return func(m optionalAttr) { - m["sorted"] = value - } -} - -// Finds values and indices of the `k` largest elements for the last dimension. -// -// If the input is a vector (rank-1), finds the `k` largest entries in the vector -// and outputs their values and indices as vectors. Thus `values[j]` is the -// `j`-th largest entry in `input`, and its index is `indices[j]`. -// -// For matrices (resp. higher rank input), computes the top `k` entries in each -// row (resp. vector along the last dimension). Thus, -// -// values.shape = indices.shape = input.shape[:-1] + [k] -// -// If two elements are equal, the lower-index element appears first. -// -// Arguments: -// input: 1-D or higher with last dimension at least `k`. -// k: 0-D. Number of top elements to look for along the last dimension (along each -// row for matrices). -// -// Returns: -// values: The `k` largest elements along each last dimensional slice. -// indices: The indices of `values` within the last dimension of `input`. -func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TopKV2", - Input: []tf.Input{ - input, k, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// TopKAttr is an optional argument to TopK. -type TopKAttr func(optionalAttr) - -// TopKSorted sets the optional sorted attribute to value. -// -// value: If true the resulting `k` elements will be sorted by the values in -// descending order. -// If not specified, defaults to true -func TopKSorted(value bool) TopKAttr { - return func(m optionalAttr) { - m["sorted"] = value - } -} - -// Finds values and indices of the `k` largest elements for the last dimension. -// -// DEPRECATED at GraphDef version 7: Use TopKV2 instead -// -// If the input is a vector (rank-1), finds the `k` largest entries in the vector -// and outputs their values and indices as vectors. Thus `values[j]` is the -// `j`-th largest entry in `input`, and its index is `indices[j]`. -// -// For matrices (resp. higher rank input), computes the top `k` entries in each -// row (resp. vector along the last dimension). Thus, -// -// values.shape = indices.shape = input.shape[:-1] + [k] -// -// If two elements are equal, the lower-index element appears first. -// -// If `k` varies dynamically, use `TopKV2` below. -// -// Arguments: -// input: 1-D or higher with last dimension at least `k`. -// k: Number of top elements to look for along the last dimension (along each -// row for matrices). -// -// Returns: -// values: The `k` largest elements along each last dimensional slice. -// indices: The indices of `values` within the last dimension of `input`. -func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"k": k} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TopK", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Outputs the single element from the given dataset. -// -// Arguments: -// dataset: A handle to a dataset that contains a single element. -// -// -// -// Returns The components of the single element of `input`. -func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DatasetToSingleElement", - Input: []tf.Input{ - dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("DatasetToSingleElement", err) - return - } - return components -} - -// Computes softmax cross entropy cost and gradients to backpropagate. -// -// Inputs are the logits, not probabilities. -// -// Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size x num_classes matrix -// The caller must ensure that each batch of labels represents a valid -// probability distribution. -// -// Returns: -// loss: Per example loss (batch_size vector). -// backprop: backpropagated gradients (batch_size x num_classes matrix). -func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SoftmaxCrossEntropyWithLogits", - Input: []tf.Input{ - features, labels, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes log softmax activations. -// -// For each batch `i` and class `j` we have -// -// logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i]))) -// -// Arguments: -// logits: 2-D with shape `[batch_size, num_classes]`. -// -// Returns Same shape as `logits`. -func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogSoftmax", - Input: []tf.Input{ - logits, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softmax activations. -// -// For each batch `i` and class `j` we have -// -// $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$ -// -// Arguments: -// logits: 2-D with shape `[batch_size, num_classes]`. -// -// Returns Same shape as `logits`. -func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Softmax", - Input: []tf.Input{ - logits, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softsign gradients for a softsign operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding softsign operation. -// features: The features passed as input to the corresponding softsign operation. -// -// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. -func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SoftsignGrad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the LSTM cell backward propagation for 1 timestep. -// -// This implementation is to be used in conjunction of LSTMBlockCell. -// -// Arguments: -// x: The input to the LSTM cell, shape (batch_size, num_inputs). -// cs_prev: The previous cell state. -// h_prev: The previous h state. -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// i: The input gate. -// cs: The cell state before the tanh. -// f: The forget gate. -// o: The output gate. -// ci: The cell input. -// co: The cell after the tanh. -// cs_grad: The current gradient of cs. -// h_grad: The gradient of h vector. -// use_peephole: Whether the cell uses peephole connections. -// -// Returns: -// cs_prev_grad: The gradient of cs to be back-propped. -// dicfo: The derivative wrt to [i, cs, f, o]. -// wci_grad: The gradient for wci to be back-propped. -// wcf_grad: The gradient for wcf to be back-propped. -// wco_grad: The gradient for wco to be back-propped. -func LSTMBlockCellGrad(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (cs_prev_grad tf.Output, dicfo tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"use_peephole": use_peephole} - opspec := tf.OpSpec{ - Type: "LSTMBlockCellGrad", - Input: []tf.Input{ - x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, cs_grad, h_grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Computes gradients for the scaled exponential linear (Selu) operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Selu operation. -// outputs: The outputs of the corresponding Selu operation. -// -// Returns The gradients: `gradients * (outputs + scale * alpha)` -// if outputs < 0, `scale * gradients` otherwise. -func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SeluGrad", - Input: []tf.Input{ - gradients, outputs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for the exponential linear (Elu) operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Elu operation. -// outputs: The outputs of the corresponding Elu operation. -// -// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, -// `gradients` otherwise. -func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "EluGrad", - Input: []tf.Input{ - gradients, outputs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LeakyReluGradAttr is an optional argument to LeakyReluGrad. -type LeakyReluGradAttr func(optionalAttr) - -// LeakyReluGradAlpha sets the optional alpha attribute to value. -// If not specified, defaults to 0.2 -func LeakyReluGradAlpha(value float32) LeakyReluGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// Computes rectified linear gradients for a LeakyRelu operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding LeakyRelu operation. -// features: The features passed as input to the corresponding LeakyRelu operation, -// OR the outputs of that operation (both work equivalently). -// -// Returns `gradients * (features > 0) + alpha * gradients * (features <= 0)`. -func LeakyReluGrad(scope *Scope, gradients tf.Output, features tf.Output, optional ...LeakyReluGradAttr) (backprops tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LeakyReluGrad", - Input: []tf.Input{ - gradients, features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient of morphological 2-D dilation with respect to the filter. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. -// -// Returns 3-D with shape `[filter_height, filter_width, depth]`. -func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "Dilation2DBackpropFilter", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts the given variant tensor to an iterator and stores it in the given resource. -// -// Arguments: -// resource_handle: A handle to an iterator resource. -// serialized: A variant tensor storing the state of the iterator contained in the -// resource. -// -// Returns the created operation. -func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DeserializeIterator", - Input: []tf.Input{ - resource_handle, serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Computes the gradient for the rsqrt of `x` wrt its input. -// -// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy` -// is the corresponding input gradient. -func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RsqrtGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) - -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { - return func(m optionalAttr) { - m["Targmax"] = value - } -} - -// MaxPoolWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value. -// -// value: Whether to include batch dimension in flattened index of `argmax`. -// If not specified, defaults to false -func MaxPoolWithArgmaxIncludeBatchInIndex(value bool) MaxPoolWithArgmaxAttr { - return func(m optionalAttr) { - m["include_batch_in_index"] = value - } -} - -// Performs max pooling on the input and outputs both max values and indices. -// -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index: -// `(y * width + x) * channels + c` if `include_batch_in_index` is False; -// `((b * height + y) * width + x) * channels + c` if `include_batch_in_index` is True. -// -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns: -// output: The max pooled output tensor. -// argmax: 4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. -type MaxPoolGradGradAttr func(optionalAttr) - -// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. -type MaxPoolGradV2Attr func(optionalAttr) - -// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concats all tensors in the list along the 0th dimension. -// -// Requires that all tensors have the same shape except the first dimension. -// -// input_handle: The input list. -// element_shape: The shape of the uninitialized elements in the list. If the first -// dimension is not -1, it is assumed that all list elements have the same -// leading dim. -// leading_dims: The list of leading dims of uninitialized list elements. Used if -// the leading dim of input_handle.element_shape or the element_shape input arg -// is not already set. -// tensor: The concated result. -// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. -// -func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListConcatV2", - Input: []tf.Input{ - input_handle, element_shape, leading_dims, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MaxPoolV2Attr is an optional argument to MaxPoolV2. -type MaxPoolV2Attr func(optionalAttr) - -// MaxPoolV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolV2", - Input: []tf.Input{ - input, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseReduceSumAttr is an optional argument to SparseReduceSum. -type SparseReduceSumAttr func(optionalAttr) - -// SparseReduceSumKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the sum of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In particular, this Op also returns a dense `Tensor` -// instead of a sparse one. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -// -// Returns `R-K`-D. The reduced Tensor. -func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseReduceSum", - Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). -// -// The Hurwitz zeta function is defined as: -// -// -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Zeta", - Input: []tf.Input{ - x, q, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns which elements of x are Inf. -// -// @compatibility(numpy) -// Equivalent to np.isinf -// @end_compatibility -// -// Example: -// -// ```python -// x = tf.constant([5.0, np.inf, 6.8, np.inf]) -// tf.math.is_inf(x) ==> [False, True, False, True] -// ``` -func IsInf(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsInf", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPool3DAttr is an optional argument to MaxPool3D. -type MaxPool3DAttr func(optionalAttr) - -// MaxPool3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DDataFormat(value string) MaxPool3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs 3D max pooling on the input. -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool3D", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayCloseV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayCloseV3 -// -// Returns the created operation. -func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayCloseV2", - Input: []tf.Input{ - handle, - }, - } - return scope.AddOperation(opspec) -} - -// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad. -type AvgPool3DGradAttr func(optionalAttr) - -// AvgPool3DGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of average pooling function. -// -// Arguments: -// orig_input_shape: The original input dimensions. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns The backprop for input. -func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool3DGrad", - Input: []tf.Input{ - orig_input_shape, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter. -type Conv3DBackpropFilterAttr func(optionalAttr) - -// Conv3DBackpropFilterDilations sets the optional dilations attribute to value. -// If not specified, defaults to -func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the filter. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilter", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv3DAttr is an optional argument to Conv3D. -type Conv3DAttr func(optionalAttr) - -// Conv3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DDataFormat(value string) Conv3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DDilations(value []int64) Conv3DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 3-D convolution given 5-D `input` and `filter` tensors. -// -// In signal processing, cross-correlation is a measure of similarity of -// two waveforms as a function of a time-lag applied to one of them. This -// is also known as a sliding dot product or sliding inner-product. -// -// Our Conv3D implements a form of cross-correlation. -// -// Arguments: -// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. -// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, -// out_channels]`. `in_channels` must match between `input` and `filter`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3D", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueWithCountsAttr is an optional argument to UniqueWithCounts. -type UniqueWithCountsAttr func(optionalAttr) - -// UniqueWithCountsOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`. This operation also returns a -// tensor `idx` the same size as `x` that contains the index of each value of `x` -// in the unique output `y`. Finally, it returns a third tensor `count` that -// contains the count of each element of `y` in `x`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx, count = unique_with_counts(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// count ==> [2, 1, 3, 1, 2] -// ``` -// -// Arguments: -// x: 1-D. -// -// Returns: -// y: 1-D. -// idx: 1-D. -// count: 1-D. -func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueWithCounts", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. -type ResizeBicubicGradAttr func(optionalAttr) - -// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBicubicGradHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBicubicGradHalfPixelCenters(value bool) ResizeBicubicGradAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Computes the gradient of bicubic interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBicubicGrad", - Input: []tf.Input{ - grads, original_image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`. -// -// tensor: The tensor to put on the list. -// input_handle: The old list. -// output_handle: A list with the elements of the old list followed by tensor. -// element_dtype: the type of elements in the list. -// element_shape: a shape compatible with that of elements in the list. -func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListPushBack", - Input: []tf.Input{ - input_handle, tensor, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns which elements of x are NaN. -// -// @compatibility(numpy) -// Equivalent to np.isnan -// @end_compatibility -// -// Example: -// -// ```python -// x = tf.constant([5.0, np.nan, 6.8, np.nan, np.inf]) -// tf.math.is_nan(x) ==> [False, True, False, True, False] -// ``` -func IsNan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsNan", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds a value to the current value of a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to -// see the incremented value or a subsequent newer one. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. -// -// Returns the created operation. -func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignAddVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. -type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) - -// DepthwiseConv2dNativeBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value. -// If not specified, defaults to <> -func DepthwiseConv2dNativeBackpropInputExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropInputAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the input. -// -// Arguments: -// input_sizes: An integer vector representing the shape of `input`, based -// on `data_format`. For example, if `data_format` is 'NHWC' then -// `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape according to `data_format`. For example, if -// `data_format` is 'NHWC', output shape is `[batch, in_height, -// in_width, in_channels]`. Gradient w.r.t. the input of the -// convolution. -func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropInput", - Input: []tf.Input{ - input_sizes, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Updates the table to associates keys with values. -// -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. -// -// Returns the created operation. -func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableInsertV2", - Input: []tf.Input{ - table_handle, keys, values, - }, - } - return scope.AddOperation(opspec) -} - -// Component-wise multiplies a SparseTensor by a dense Tensor. -// -// The output locations corresponding to the implicitly zero elements in the sparse -// tensor will be zero (i.e., will not take up storage space), regardless of the -// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN). -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. -// -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseMul", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Performs a padding as a preprocess during a convolution. -// -// Similar to FusedResizeAndPadConv2d, this op allows for an optimized -// implementation where the spatial padding transformation stage is fused with the -// im2col lookup, but in this case without the bilinear filtering required for -// resizing. Fusing the padding prevents the need to write out the intermediate -// results as whole tensors, reducing memory pressure, and we can get some latency -// gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC' -// order is used instead. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "FusedPadConv2D", - Input: []tf.Input{ - input, paddings, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adjust the hue of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpreted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A delta is then applied all the hue values, -// and then remapped back to RGB colorspace. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// delta: A float delta to add to the hue. -// -// Returns The hue-adjusted image or images. -func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustHue", - Input: []tf.Input{ - images, delta, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// List of the given size with empty elements. -// -// element_shape: the shape of the future elements of the list -// num_elements: the number of elements to reserve -// handle: the output list -// element_dtype: the desired type of elements in the list. -func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListReserve", - Input: []tf.Input{ - element_shape, num_elements, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Clips tensor values to a specified min and max. -// -// Given a tensor `t`, this operation returns a tensor of the same type and -// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. -// Any values less than `clip_value_min` are set to `clip_value_min`. Any values -// greater than `clip_value_max` are set to `clip_value_max`. -// -// Arguments: -// t: A `Tensor`. -// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The minimum value to clip by. -// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The maximum value to clip by. -// -// Returns A clipped `Tensor` with the same shape as input 't'. -func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ClipByValue", - Input: []tf.Input{ - t, clip_value_min, clip_value_max, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter. -type Conv2DBackpropFilterAttr func(optionalAttr) - -// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value - } -} - -// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. -// -// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith -// dimension, the amount of padding inserted before and after the dimension is -// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If -// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. -// If not specified, defaults to <> -func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv2DBackpropFilterDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of convolution with respect to the filter. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, out_channels]` tensor. -// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. Must be in the same order as the dimension specified with -// format. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv2DBackpropFilter", - Input: []tf.Input{ - input, filter_sizes, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of x OR y element-wise. -// -// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalOr", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds `bias` to `value`. -// -// This is a deprecated version of BiasAdd and will be soon removed. -// -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. -// -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. -// -// Returns Broadcasted sum of `value` and `bias`. -func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BiasAddV1", - Input: []tf.Input{ - value, bias, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BiasAddAttr is an optional argument to BiasAdd. -type BiasAddAttr func(optionalAttr) - -// BiasAddDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddDataFormat(value string) BiasAddAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Adds `bias` to `value`. -// -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. -// -// Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. -// -// Returns Broadcasted sum of `value` and `bias`. -func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BiasAdd", - Input: []tf.Input{ - value, bias, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the inverse of `x` wrt its input. -// -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InvGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds up a SparseTensor and a dense Tensor, using these special rules: -// -// (1) Broadcasts the dense side to have the same shape as the sparse side, if -// eligible; -// (2) Then, only the dense values pointed to by the indices of the SparseTensor -// participate in the cwise addition. -// -// By these rules, the result is a logical SparseTensor with exactly the same -// indices and shape, but possibly with different non-zero values. The output of -// this Op is the resultant non-zero values. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. -// -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseAdd", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodeBase64Attr is an optional argument to EncodeBase64. -type EncodeBase64Attr func(optionalAttr) - -// EncodeBase64Pad sets the optional pad attribute to value. -// -// value: Bool whether padding is applied at the ends. -// If not specified, defaults to false -func EncodeBase64Pad(value bool) EncodeBase64Attr { - return func(m optionalAttr) { - m["pad"] = value - } -} - -// Encode strings into web-safe base64 format. -// -// Refer to the following article for more information on base64 format: -// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the -// end so that the encoded has length multiple of 4. See Padding section of the -// link above. -// -// Web-safe means that the encoder uses - and _ instead of + and /. -// -// Arguments: -// input: Strings to be encoded. -// -// Returns Input strings encoded in base64. -func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EncodeBase64", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FakeQuantWithMinMaxArgsAttr is an optional argument to FakeQuantWithMinMaxArgs. -type FakeQuantWithMinMaxArgsAttr func(optionalAttr) - -// FakeQuantWithMinMaxArgsMin sets the optional min attribute to value. -// If not specified, defaults to -6 -func FakeQuantWithMinMaxArgsMin(value float32) FakeQuantWithMinMaxArgsAttr { - return func(m optionalAttr) { - m["min"] = value - } -} - -// FakeQuantWithMinMaxArgsMax sets the optional max attribute to value. -// If not specified, defaults to 6 -func FakeQuantWithMinMaxArgsMax(value float32) FakeQuantWithMinMaxArgsAttr { - return func(m optionalAttr) { - m["max"] = value - } -} - -// FakeQuantWithMinMaxArgsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxArgsNumBits(value int64) FakeQuantWithMinMaxArgsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxArgsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type. -// -// Attributes -// -// * `[min; max]` define the clamping range for the `inputs` data. -// * `inputs` values are quantized into the quantization range ( -// `[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` -// when it is true) and then de-quantized and output as floats in `[min; max]` -// interval. -// * `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. -// -// Before quantization, `min` and `max` values are adjusted with the following -// logic. -// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values, -// the behavior can be unexpected: -// -// * If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`. -// * If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`. -// * If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `, -// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`. -// -// Quantization is called fake since the output is still in floating point. -func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxArgs", - Input: []tf.Input{ - inputs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. Prefer `tf.nn.batch_normalization`. -// -// Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} - opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", - Input: []tf.Input{ - t, m, v, beta, gamma, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) - -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// QuantizedConv2DDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2D convolution given quantized 4D input and filter tensors. -// -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. -// -// Arguments: -// -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. -// -// Returns: -// output -// min_output: The float value that the lowest quantized output value represents. -// max_output: The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedConv2D", - Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Computes rectified linear 6 gradients for a Relu6 operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu6 operation. -// features: The features passed as input to the corresponding Relu6 operation, or -// its output; using either one produces the same result. -// -// Returns The gradients: -// `gradients * (features > 0) * (features < 6)`. -func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu6Grad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringSplitAttr is an optional argument to StringSplit. -type StringSplitAttr func(optionalAttr) - -// StringSplitSkipEmpty sets the optional skip_empty attribute to value. -// -// value: A `bool`. If `True`, skip the empty strings from the result. -// If not specified, defaults to true -func StringSplitSkipEmpty(value bool) StringSplitAttr { - return func(m optionalAttr) { - m["skip_empty"] = value - } -} - -// Split elements of `input` based on `delimiter` into a `SparseTensor`. -// -// Let N be the size of source (typically N will be the batch size). Split each -// element of `input` based on `delimiter` and return a `SparseTensor` -// containing the splitted tokens. Empty tokens are ignored. -// -// `delimiter` can be empty, or a string of split characters. If `delimiter` is an -// empty string, each element of `input` is split into individual single-byte -// character strings, including splitting of UTF-8 multibyte sequences. Otherwise -// every character of `delimiter` is a potential split point. -// -// For example: -// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output -// will be -// -// indices = [0, 0; -// 0, 1; -// 1, 0; -// 1, 1; -// 1, 2] -// shape = [2, 3] -// values = ['hello', 'world', 'a', 'b', 'c'] -// -// Arguments: -// input: 1-D. Strings to split. -// delimiter: 0-D. Delimiter characters (bytes), or empty string. -// -// Returns: -// indices: A dense matrix of int64 representing the indices of the sparse tensor. -// values: A vector of strings corresponding to the splited values. -// shape: a length-2 vector of int64 representing the shape of the sparse -// tensor, where the first value is N and the second value is the maximum number -// of tokens in a single input entry. -func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringSplit", - Input: []tf.Input{ - input, delimiter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Assigns sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] = updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = updates[i, ..., j, ...] -// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterUpdate", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// EditDistanceAttr is an optional argument to EditDistance. -type EditDistanceAttr func(optionalAttr) - -// EditDistanceNormalize sets the optional normalize attribute to value. -// -// value: boolean (if true, edit distances are normalized by length of truth). -// -// The output is: -// If not specified, defaults to true -func EditDistanceNormalize(value bool) EditDistanceAttr { - return func(m optionalAttr) { - m["normalize"] = value - } -} - -// Computes the (possibly normalized) Levenshtein Edit Distance. -// -// The inputs are variable-length sequences provided by SparseTensors -// (hypothesis_indices, hypothesis_values, hypothesis_shape) -// and -// (truth_indices, truth_values, truth_shape). -// -// The inputs are: -// -// Arguments: -// hypothesis_indices: The indices of the hypothesis list SparseTensor. -// This is an N x R int64 matrix. -// hypothesis_values: The values of the hypothesis list SparseTensor. -// This is an N-length vector. -// hypothesis_shape: The shape of the hypothesis list SparseTensor. -// This is an R-length vector. -// truth_indices: The indices of the truth list SparseTensor. -// This is an M x R int64 matrix. -// truth_values: The values of the truth list SparseTensor. -// This is an M-length vector. -// truth_shape: truth indices, vector. -// -// Returns A dense float tensor with rank R - 1. -// -// For the example input: -// -// // hypothesis represents a 2x1 matrix with variable-length values: -// // (0,0) = ["a"] -// // (1,0) = ["b"] -// hypothesis_indices = [[0, 0, 0], -// [1, 0, 0]] -// hypothesis_values = ["a", "b"] -// hypothesis_shape = [2, 1, 1] -// -// // truth represents a 2x2 matrix with variable-length values: -// // (0,0) = [] -// // (0,1) = ["a"] -// // (1,0) = ["b", "c"] -// // (1,1) = ["a"] -// truth_indices = [[0, 1, 0], -// [1, 0, 0], -// [1, 0, 1], -// [1, 1, 0]] -// truth_values = ["a", "b", "c", "a"] -// truth_shape = [2, 2, 2] -// normalize = true -// -// The output will be: -// -// // output is a 2x2 matrix with edit distances normalized by truth lengths. -// output = [[inf, 1.0], // (0,0): no truth, (0,1): no hypothesis -// [0.5, 1.0]] // (1,0): addition, (1,1): no hypothesis -func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EditDistance", - Input: []tf.Input{ - hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concatenates a list of `N` tensors along the first dimension. -// -// The input tensors are all required to have size 1 in the first dimension. -// -// For example: -// -// ``` -// # 'x' is [[1, 4]] -// # 'y' is [[2, 5]] -// # 'z' is [[3, 6]] -// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]] # Pack along first dim. -// ``` -// -// The difference between concat and parallel_concat is that concat requires all -// of the inputs be computed before the operation will begin but doesn't require -// that the input shapes be known during graph construction. Parallel concat -// will copy pieces of the input into the output as they become available, in -// some situations this can provide a performance benefit. -// -// Arguments: -// values: Tensors to be concatenated. All must have size 1 in the first dimension -// and same shape. -// shape: the final shape of the result; should be equal to the shapes of any input -// but with the number of input values in the first dimension. -// -// Returns The concatenated tensor. -func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "ParallelConcat", - Input: []tf.Input{ - tf.OutputList(values), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) - -// AvgPoolGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the average pooling function. -// -// Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. -// padding: The type of padding algorithm to use. -// -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPoolGrad", - Input: []tf.Input{ - orig_input_shape, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix. -type LoadAndRemapMatrixAttr func(optionalAttr) - -// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value. -// -// value: The maximum number of rows to load from the checkpoint at -// once. If less than or equal to 0, the entire matrix will be loaded into -// memory. Setting this arg trades increased disk reads for lower memory usage. -// If not specified, defaults to -1 -func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr { - return func(m optionalAttr) { - m["max_rows_in_memory"] = value - } -} - -// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint -// -// at `ckpt_path` and potentially reorders its rows and columns using the -// specified remappings. -// -// Most users should use one of the wrapper initializers (such as -// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this -// function directly. -// -// The remappings are 1-D tensors with the following properties: -// -// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output -// matrix will be initialized from the row corresponding to index -// `row_remapping[i]` in the old `Tensor` from the checkpoint. -// * `col_remapping` must have either 0 entries (indicating that no column -// reordering is needed) or `num_cols` entries. If specified, column `j` of the -// output matrix will be initialized from the column corresponding to index -// `col_remapping[j]` in the old `Tensor` from the checkpoint. -// * A value of -1 in either of the remappings signifies a "missing" entry. In that -// case, values from the `initializing_values` tensor will be used to fill that -// missing row or column. If `row_remapping` has `r` missing entries and -// `col_remapping` has `c` missing entries, then the following condition must be -// true: -// -// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` -// -// The remapping tensors can be generated using the GenerateVocabRemapping op. -// -// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], -// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing -// the value from row i, column j of the old tensor in the checkpoint, the output -// matrix will look like the following: -// -// [[w(1, 0), w(1, 2), 0.5], -// [w(0, 0), w(0, 2), -0.5], -// [0.25, -0.25, 42]] -// -// Arguments: -// ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from -// which the old matrix `Tensor` will be loaded. -// old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. -// row_remapping: An int `Tensor` of row remappings (generally created by -// `generate_vocab_remapping`). Even if no row remapping is needed, this must -// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted -// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). -// col_remapping: An int `Tensor` of column remappings (generally created by -// `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping -// is to be done (e.g. column ordering is the same). -// initializing_values: A float `Tensor` containing values to fill in for cells -// in the output matrix that are not loaded from the checkpoint. Length must be -// exactly the same as the number of missing / new cells. -// num_rows: Number of rows (length of the 1st dimension) in the output matrix. -// num_cols: Number of columns (length of the 2nd dimension) in the output matrix. -// -// Returns Output matrix containing existing values loaded from the -// checkpoint, and with any missing values filled in from initializing_values. -func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadAndRemapMatrix", - Input: []tf.Input{ - ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Does nothing. Only useful as a placeholder for control edges. -// -// Returns the created operation. -func NoOp(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NoOp", - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. -type ResourceSparseApplyRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. -// -// Returns the created operation. -func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyRMSProp", - Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Converts each string in the input Tensor to the specified numeric type. -// -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) -// -// Example: -// -// >>> strings = ["5.0", "3.0", "7.0"] -// >>> tf.strings.to_number(strings) -// -// -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringToNumber", - Input: []tf.Input{ - string_tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Transforms a Tensor into a serialized TensorProto proto. -// -// Arguments: -// tensor: A Tensor of type `T`. -// -// Returns A serialized TensorProto proto of the input tensor. -func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SerializeTensor", - Input: []tf.Input{ - tensor, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. -// -// Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. -// -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - opspec := tf.OpSpec{ - Type: "ParseTensor", - Input: []tf.Input{ - serialized, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise integer closest to x. -// -// If the result is midway between two representable values, -// the even representable is chosen. -// For example: -// -// ``` -// rint(-1.5) ==> -2.0 -// rint(0.5000001) ==> 1.0 -// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] -// ``` -func Rint(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rint", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reverses specific dimensions of a tensor. -// -// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions -// of `tensor`, this operation reverses each dimension i of `tensor` where -// `dims[i]` is `True`. -// -// `tensor` can have up to 8 dimensions. The number of dimensions -// of `tensor` must equal the number of elements in `dims`. In other words: -// -// `rank(tensor) = size(dims)` -// -// For example: -// -// ``` -// # tensor 't' is [[[[ 0, 1, 2, 3], -// # [ 4, 5, 6, 7], -// # [ 8, 9, 10, 11]], -// # [[12, 13, 14, 15], -// # [16, 17, 18, 19], -// # [20, 21, 22, 23]]]] -// # tensor 't' shape is [1, 2, 3, 4] -// -// # 'dims' is [False, False, False, True] -// reverse(t, dims) ==> [[[[ 3, 2, 1, 0], -// [ 7, 6, 5, 4], -// [ 11, 10, 9, 8]], -// [[15, 14, 13, 12], -// [19, 18, 17, 16], -// [23, 22, 21, 20]]]] -// -// # 'dims' is [False, True, False, False] -// reverse(t, dims) ==> [[[[12, 13, 14, 15], -// [16, 17, 18, 19], -// [20, 21, 22, 23] -// [[ 0, 1, 2, 3], -// [ 4, 5, 6, 7], -// [ 8, 9, 10, 11]]]] -// -// # 'dims' is [False, False, True, False] -// reverse(t, dims) ==> [[[[8, 9, 10, 11], -// [4, 5, 6, 7], -// [0, 1, 2, 3]] -// [[20, 21, 22, 23], -// [16, 17, 18, 19], -// [12, 13, 14, 15]]]] -// ``` -// -// Arguments: -// tensor: Up to 8-D. -// dims: 1-D. The dimensions to reverse. -// -// Returns The same shape as `tensor`. -func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reverse", - Input: []tf.Input{ - tensor, dims, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Wraps an arbitrary MLIR computation expressed as a module with a main() function. -// -// This operation does not have an associated kernel and is not intended to be -// executed in a regular TensorFlow session. Instead it is intended to be used for -// testing or for special case where a user intends to pass custom MLIR computation -// through a TensorFlow graph with the intent of having custom tooling processing -// it downstream (when targeting a different environment, like TensorFlow lite for -// example). -// The MLIR module is expected to have a main() function that will be used as an -// entry point. The inputs to the operations will be passed as argument to the -// main() function and the returned values of the main function mapped to the -// outputs. -// Example usage: -// -// ``` -// import tensorflow as tf -// from tensorflow.compiler.mlir.tensorflow.gen_mlir_passthrough_op import mlir_passthrough_op -// -// mlir_module = '''python -// func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> { -// %add = "magic.op"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32> -// return %ret : tensor<10x10xf32> -// } -// ''' -// -// @tf.function -// def foo(x, y): -// return mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32]) -// -// graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def() -// ``` -func MlirPassthroughOp(scope *Scope, inputs []tf.Output, mlir_module string, Toutputs []tf.DataType) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mlir_module": mlir_module, "Toutputs": Toutputs} - opspec := tf.OpSpec{ - Type: "MlirPassthroughOp", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("MlirPassthroughOp", err) - return - } - return outputs -} - -// StringLowerAttr is an optional argument to StringLower. -type StringLowerAttr func(optionalAttr) - -// StringLowerEncoding sets the optional encoding attribute to value. -// If not specified, defaults to "" -func StringLowerEncoding(value string) StringLowerAttr { - return func(m optionalAttr) { - m["encoding"] = value - } -} - -// Converts all uppercase characters into their respective lowercase replacements. -// -// Example: -// -// >>> tf.strings.lower("CamelCase string and ALL CAPS") -// -// -func StringLower(scope *Scope, input tf.Output, optional ...StringLowerAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringLower", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseSequenceExampleV2Attr is an optional argument to ParseSequenceExampleV2. -type ParseSequenceExampleV2Attr func(optionalAttr) - -// ParseSequenceExampleV2NcontextSparse sets the optional Ncontext_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["Ncontext_sparse"] = value - } -} - -// ParseSequenceExampleV2ContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["context_sparse_types"] = value - } -} - -// ParseSequenceExampleV2ContextRaggedValueTypes sets the optional context_ragged_value_types attribute to value. -// -// value: RaggedTensor.value dtypes for the ragged context features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["context_ragged_value_types"] = value - } -} - -// ParseSequenceExampleV2ContextRaggedSplitTypes sets the optional context_ragged_split_types attribute to value. -// -// value: RaggedTensor.row_split dtypes for the ragged context features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["context_ragged_split_types"] = value - } -} - -// ParseSequenceExampleV2ContextDenseShapes sets the optional context_dense_shapes attribute to value. -// -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2ContextDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value - } -} - -// ParseSequenceExampleV2NfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleV2NfeatureListSparse(value int64) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["Nfeature_list_sparse"] = value - } -} - -// ParseSequenceExampleV2NfeatureListDense sets the optional Nfeature_list_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleV2NfeatureListDense(value int64) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["Nfeature_list_dense"] = value - } -} - -// ParseSequenceExampleV2FeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. -// -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_sparse_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListRaggedValueTypes sets the optional feature_list_ragged_value_types attribute to value. -// -// value: RaggedTensor.value dtypes for the ragged FeatureList features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_ragged_value_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListRaggedSplitTypes sets the optional feature_list_ragged_split_types attribute to value. -// -// value: RaggedTensor.row_split dtypes for the ragged FeatureList features. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_ragged_split_types"] = value - } -} - -// ParseSequenceExampleV2FeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. -// -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr { - return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value - } -} - -// Transforms a vector of tf.io.SequenceExample protos (as strings) into -// typed tensors. -// -// Arguments: -// serialized: A scalar or vector containing binary serialized SequenceExample protos. -// debug_name: A scalar or vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no name is available. -// context_sparse_keys: The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: The keys expected in the SequenceExamples' context features associated with -// dense values. -// context_ragged_keys: The keys expected in the Examples' features associated with context_ragged -// values. -// feature_list_sparse_keys: The keys expected in the FeatureLists associated with sparse values. -// feature_list_dense_keys: The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -// feature_list_ragged_keys: The keys expected in the FeatureLists associated with ragged values. -// feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with feature_list_dense_keys, indicating which -// features may be missing from the SequenceExamples. If the associated -// FeatureList is missing, it is treated as empty. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -func ParseSequenceExampleV2(scope *Scope, serialized tf.Output, debug_name tf.Output, context_sparse_keys tf.Output, context_dense_keys tf.Output, context_ragged_keys tf.Output, feature_list_sparse_keys tf.Output, feature_list_dense_keys tf.Output, feature_list_ragged_keys tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_dense_defaults []tf.Output, optional ...ParseSequenceExampleV2Attr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, context_ragged_values []tf.Output, context_ragged_row_splits []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output, feature_list_ragged_values []tf.Output, feature_list_ragged_outer_splits []tf.Output, feature_list_ragged_inner_splits []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParseSequenceExampleV2", - Input: []tf.Input{ - serialized, debug_name, context_sparse_keys, context_dense_keys, context_ragged_keys, feature_list_sparse_keys, feature_list_dense_keys, feature_list_ragged_keys, feature_list_dense_missing_assumed_empty, tf.OutputList(context_dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_ragged_values, idx, err = makeOutputList(op, idx, "context_ragged_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if context_ragged_row_splits, idx, err = makeOutputList(op, idx, "context_ragged_row_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_ragged_values, idx, err = makeOutputList(op, idx, "feature_list_ragged_values"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_ragged_outer_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_outer_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - if feature_list_ragged_inner_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_inner_splits"); err != nil { - scope.UpdateErr("ParseSequenceExampleV2", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, context_ragged_values, context_ragged_row_splits, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths, feature_list_ragged_values, feature_list_ragged_outer_splits, feature_list_ragged_inner_splits -} - -// Gives a guarantee to the TF runtime that the input tensor is a constant. -// -// The runtime is then free to make optimizations based on this. -// -// Only accepts value typed tensors as inputs and rejects resource variable handles -// as input. -// -// Returns the input tensor without modification. -func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GuaranteeConst", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Transforms a tf.Example proto (as a string) into typed tensors. -// -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// dense_defaults: A list of Tensors (some may be empty), whose length matches -// the length of `dense_keys`. dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse features to be parsed from the example. This -// must match the lengths of `sparse_keys` and `sparse_types`. -// sparse_keys: A list of `num_sparse` strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: The keys expected in the Examples' features associated with dense -// values. -// sparse_types: A list of `num_sparse` types; the data types of data in each -// Feature given in sparse_keys. -// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: The shapes of data in each Feature given in dense_keys. -// The length of this list must match the length of `dense_keys`. The -// number of elements in the Feature corresponding to dense_key[j] must -// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == -// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] -// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, -// ..., DN), the shape of the output Tensor dense_values[j] will be (M, -// D1, .., DN), where M is the number of blocks of elements of length -// D1 * .... * DN, in the input. -func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseSingleExample", - Input: []tf.Input{ - serialized, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values -} - -// Scatter `updates` into a new tensor according to `indices`. -// -// Creates a new tensor by applying sparse `updates` to individual values or -// slices within a tensor (initially zero for numeric, empty for string) of -// the given `shape` according to indices. This operator is the inverse of the -// `tf.gather_nd` operator which extracts values or slices from a given tensor. -// -// This operation is similar to tensor_scatter_add, except that the tensor is -// zero-initialized. Calling `tf.scatter_nd(indices, values, shape)` is identical -// to `tensor_scatter_add(tf.zeros(shape, values.dtype), indices, values)` -// -// If `indices` contains duplicates, then their updates are accumulated (summed). -// -// **WARNING**: The order in which updates are applied is nondeterministic, so the -// output will be nondeterministic if `indices` contains duplicates -- because -// of some numerical approximation issues, numbers summed in different order -// may yield different results. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: -// -// indices.shape[-1] <= shape.rank -// -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of scatter is to insert individual elements in a tensor by -// index. For example, say we want to insert 4 scattered elements in a rank-1 -// tensor with 8 elements. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// shape = tf.constant([8]) -// scatter = tf.scatter_nd(indices, updates, shape) -// print(scatter) -// ``` -// -// The resulting tensor would look like this: -// -// [0, 11, 0, 10, 9, 0, 0, 12] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -//
-// -//
-// -// In Python, this scatter operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// shape = tf.constant([4, 4, 4]) -// scatter = tf.scatter_nd(indices, updates, shape) -// print(scatter) -// ``` -// -// The resulting tensor would look like this: -// -// [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], -// [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], -// [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// indices: Index tensor. -// updates: Updates to scatter into output. -// shape: 1-D. The shape of the resulting tensor. -// -// Returns A new tensor with the given shape and updates applied according -// to the indices. -func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScatterNd", - Input: []tf.Input{ - indices, updates, shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueAttr is an optional argument to Unique. -type UniqueAttr func(optionalAttr) - -// UniqueOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueOutIdx(value tf.DataType) UniqueAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements in a 1-D tensor. -// -// This operation returns a tensor `y` containing all of the unique elements of `x` -// sorted in the same order that they occur in `x`; `x` does not need to be sorted. -// This operation also returns a tensor `idx` the same size as `x` that contains -// the index of each value of `x` in the unique output `y`. In other words: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// Examples: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx = unique(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// ``` -// -// ``` -// # tensor 'x' is [4, 5, 1, 2, 3, 3, 4, 5] -// y, idx = unique(x) -// y ==> [4, 5, 1, 2, 3] -// idx ==> [0, 1, 2, 3, 4, 4, 0, 1] -// ``` -// -// Arguments: -// x: 1-D. -// -// Returns: -// y: 1-D. -// idx: 1-D. -func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Unique", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Converts a `RaggedTensor` into a `SparseTensor` with the same values. -// -// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) -// output=SparseTensor(indices=sparse_indices, values=sparse_values, -// dense_shape=sparse_dense_shape) -// -// Arguments: -// rt_nested_splits: The `row_splits` for the `RaggedTensor`. -// rt_dense_values: The `flat_values` for the `RaggedTensor`. -// -// Returns: -// sparse_indices: The indices for the `SparseTensor`. -// sparse_values: The values of the `SparseTensor`. -// sparse_dense_shape: `sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`. -func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RaggedTensorToSparse", - Input: []tf.Input{ - tf.OutputList(rt_nested_splits), rt_dense_values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the name of the device on which `resource` has been placed. -func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalIteratorGetDevice", - Input: []tf.Input{ - resource, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalBytesProducedStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. -// -// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) -// ](http://arxiv.org/abs/1511.07289) -func Elu(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Elu", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. -type AddSparseToTensorsMapAttr func(optionalAttr) - -// AddSparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. -// -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. -// -// A `SparseTensor` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`. -// -// This operator takes the given `SparseTensor` and adds it to a container -// object (a `SparseTensorsMap`). A unique key within this container is generated -// in the form of an `int64`, and this is the value that is returned. -// -// The `SparseTensor` can then be read out as part of a minibatch by passing -// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddSparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -// -// Returns 0-D. The handle of the `SparseTensor` now stored in the -// `SparseTensorsMap`. -func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AddSparseToTensorsMap", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Transforms a vector of tf.Example protos (as strings) into typed tensors. -// -// Arguments: -// serialized: A scalar or vector containing binary serialized Example protos. -// names: A tensor containing the names of the serialized protos. -// Corresponds 1:1 with the `serialized` tensor. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this tensor must have the same shape as "serialized". -// sparse_keys: Vector of strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: Vector of strings. -// The keys expected in the Examples' features associated with dense values. -// ragged_keys: Vector of strings. -// The keys expected in the Examples' features associated with ragged values. -// dense_defaults: A list of Tensors (some may be empty). Corresponds 1:1 with `dense_keys`. -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse keys. -// sparse_types: A list of `num_sparse` types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// ragged_value_types: A list of `num_ragged` types; the data types of data in each Feature -// given in ragged_keys (where `num_ragged = sparse_keys.size()`). -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// ragged_split_types: A list of `num_ragged` types; the data types of row_splits in each Feature -// given in ragged_keys (where `num_ragged = sparse_keys.size()`). -// May be DT_INT32 or DT_INT64. -// dense_shapes: A list of `num_dense` shapes; the shapes of data in each Feature -// given in dense_keys (where `num_dense = dense_keys.size()`). -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExampleV2(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys tf.Output, dense_keys tf.Output, ragged_keys tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_types []tf.DataType, ragged_value_types []tf.DataType, ragged_split_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output, ragged_values []tf.Output, ragged_row_splits []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_types": sparse_types, "ragged_value_types": ragged_value_types, "ragged_split_types": ragged_split_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseExampleV2", - Input: []tf.Input{ - serialized, names, sparse_keys, dense_keys, ragged_keys, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if ragged_values, idx, err = makeOutputList(op, idx, "ragged_values"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - if ragged_row_splits, idx, err = makeOutputList(op, idx, "ragged_row_splits"); err != nil { - scope.UpdateErr("ParseExampleV2", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values, ragged_values, ragged_row_splits -} - -// Saves input tensors slices to disk. -// -// This is like `Save` except that tensors can be listed in the saved file as being -// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the -// larger tensor and the slice that this tensor covers. `shapes_and_slices` must -// have as many elements as `tensor_names`. -// -// Elements of the `shapes_and_slices` input must either be: -// -// * The empty string, in which case the corresponding tensor is -// saved normally. -// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the -// `dimI` are the dimensions of the larger tensor and `slice-spec` -// specifies what part is covered by the tensor to save. -// -// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` -// where each `sliceI` is either: -// -// * The string `-` meaning that the slice covers all indices of this dimension -// * `start,length` where `start` and `length` are integers. In that -// case the slice covers `length` indices starting at `start`. -// -// See also `Save`. -// -// Arguments: -// filename: Must have a single element. The name of the file to which we write the -// tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when -// saving the tensors. -// data: `N` tensors to save. -// -// Returns the created operation. -func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SaveSlices", - Input: []tf.Input{ - filename, tensor_names, shapes_and_slices, tf.OutputList(data), - }, - } - return scope.AddOperation(opspec) -} - -// AvgPool3DAttr is an optional argument to AvgPool3D. -type AvgPool3DAttr func(optionalAttr) - -// AvgPool3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func AvgPool3DDataFormat(value string) AvgPool3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs 3D average pooling on the input. -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns The average pooled output tensor. -func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AvgPool3D", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FusedBatchNormGradV3Attr is an optional argument to FusedBatchNormGradV3. -type FusedBatchNormGradV3Attr func(optionalAttr) - -// FusedBatchNormGradV3Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradV3Epsilon(value float32) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradV3DataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradV3DataFormat(value string) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradV3IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradV3IsTraining(value bool) FusedBatchNormGradV3Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. -// reserve_space_3: When is_training is True, a 1D Tensor for some intermediate results to be reused -// in gradient computation. When is_training is False, a dummy empty Tensor will be -// created. -// -// Returns: -// x_backprop: A 4D Tensor for the gradient with respect to x. -// scale_backprop: A 1D Tensor for the gradient with respect to scale. -// offset_backprop: A 1D Tensor for the gradient with respect to offset. -// reserve_space_4: Unused placeholder to match the mean input in FusedBatchNorm. -// reserve_space_5: Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGradV3(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output, optional ...FusedBatchNormGradV3Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_4 tf.Output, reserve_space_5 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormGradV3", - Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, reserve_space_3, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) - -// DecodeRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. -// -// Arguments: -// bytes: All the elements must have the same length. -// -// -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeRaw", - Input: []tf.Input{ - bytes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gather ragged slices from `params` axis `0` according to `indices`. -// -// Outputs a `RaggedTensor` output composed from `output_dense_values` and -// `output_nested_splits`, such that: -// -// ```python -// output.shape = indices.shape + params.shape[1:] -// output.ragged_rank = indices.shape.ndims + params.ragged_rank -// output[i...j, d0...dn] = params[indices[i...j], d0...dn] -// ``` -// -// where -// -// * `params = -// ragged.from_nested_row_splits(params_dense_values, params_nested_splits)` -// provides the values that should be gathered. -// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which -// values should be gathered. -// * `output = -// ragged.from_nested_row_splits(output_dense_values, output_nested_splits)` -// is the output tensor. -// -// (Note: This c++ op is used to implement the higher-level python -// `tf.ragged.gather` op, which also supports ragged indices.) -// -// -// Arguments: -// params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the -// `params` RaggedTensor input. -// params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change -// at the python level from dense_values to flat_values, so dense_values is the -// deprecated name. -// indices: Indices in the outermost dimension of `params` of the values that should be -// gathered. -// OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain -// this number of `row_splits` tensors. This value should equal -// `indices.shape.ndims + params.ragged_rank - 1`. -// -// Returns: -// output_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the -// returned RaggedTensor. -// output_dense_values: The `flat_values` for the returned RaggedTensor. -func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK} - opspec := tf.OpSpec{ - Type: "RaggedGather", - Input: []tf.Input{ - tf.OutputList(params_nested_splits), params_dense_values, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { - scope.UpdateErr("RaggedGather", err) - return - } - output_dense_values = op.Output(idx) - return output_nested_splits, output_dense_values -} - -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) - -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["mode"] = value - } -} - -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["round_mode"] = value - } -} - -// QuantizeV2NarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func QuantizeV2NarrowRange(value bool) QuantizeV2Attr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// QuantizeV2Axis sets the optional axis attribute to value. -// If not specified, defaults to -1 -func QuantizeV2Axis(value int64) QuantizeV2Attr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// QuantizeV2EnsureMinimumRange sets the optional ensure_minimum_range attribute to value. -// If not specified, defaults to 0.01 -func QuantizeV2EnsureMinimumRange(value float32) QuantizeV2Attr { - return func(m optionalAttr) { - m["ensure_minimum_range"] = value - } -} - -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8: out[i] -= (range(T) + 1) / 2.0 -// ``` -// -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` -// -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, the quantization is performed by multiplying each -// input value by a scaling_factor. -// The scaling_factor is determined from `min_range` and `max_range` to be as large -// as possible such that the range from `min_range` to `max_range` is representable -// within values of type T. -// -// ```c++ -// -// const int min_T = std::numeric_limits::min(); -// const int max_T = std::numeric_limits::max(); -// const float max_float = std::numeric_limits::max(); -// -// const float scale_factor_from_min_side = -// (min_T * min_range > 0) ? min_T / min_range : max_float; -// const float scale_factor_from_max_side = -// (max_T * max_range > 0) ? max_T / max_range : max_float; -// -// const float scale_factor = std::min(scale_factor_from_min_side, -// scale_factor_from_max_side); -// ``` -// -// We next use the scale_factor to adjust min_range and max_range as follows: -// -// ```c++ -// min_range = min_T / scale_factor; -// max_range = max_T / scale_factor; -// ``` -// -// -// e.g. if T = qint8, and initially min_range = -10, and max_range = 9, we would -// compare -128/-10.0 = 12.8 to 127/9.0 = 14.11, and set scaling_factor = 12.8 -// In this case, min_range would remain -10, but max_range would be adjusted to -// 127 / 12.8 = 9.921875 -// -// So we will quantize input values in the range (-10, 9.921875) to (-128, 127). -// -// The input tensor can now be quantized by clipping values to the range -// `min_range` to `max_range`, then multiplying by scale_factor as follows: -// -// ```c++ -// result = round(min(max_range, max(min_range, input)) * scale_factor) -// ``` -// -// The adjusted `min_range` and `max_range` are returned as outputs 2 and 3 of -// this operation. These outputs should be used as the range for any further -// calculations. -// -// -// *narrow_range (bool) attribute* -// -// If true, we do not use the minimum quantized value. -// i.e. for int8 the quantized output, it would be restricted to the range -// -127..127 instead of the full -128..127 range. -// This is provided for compatibility with certain inference backends. -// (Only applies to SCALED mode) -// -// -// *axis (int) attribute* -// -// An optional `axis` attribute can specify a dimension index of the input tensor, -// such that quantization ranges will be calculated and applied separately for each -// slice of the tensor along that dimension. This is useful for per-channel -// quantization. -// -// If axis is specified, min_range and max_range -// -// if `axis`=None, per-tensor quantization is performed as normal. -// -// -// *ensure_minimum_range (float) attribute* -// -// Ensures the minimum quantization range is at least this value. -// The legacy default value for this is 0.01, but it is strongly suggested to -// set it to 0 for new uses. -// -// -// Arguments: -// -// min_range: The minimum value of the quantization range. This value may be adjusted by the -// op depending on other parameters. The adjusted value is written to `output_min`. -// If the `axis` attribute is specified, this must be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// max_range: The maximum value of the quantization range. This value may be adjusted by the -// op depending on other parameters. The adjusted value is written to `output_max`. -// If the `axis` attribute is specified, this must be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// -// -// Returns: -// output: The quantized data produced from the float input. -// output_min: The final quantization range minimum, used to clip input values before scaling -// and rounding them to quantized values. -// If the `axis` attribute is specified, this will be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -// output_max: The final quantization range maximum, used to clip input values before scaling -// and rounding them to quantized values. -// If the `axis` attribute is specified, this will be a 1-D tensor whose size -// matches the `axis` dimension of the input and output tensors. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeV2", - Input: []tf.Input{ - input, min_range, max_range, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the truth value of (x >= y) element-wise. -// -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// Example: -// -// ```python -// x = tf.constant([5, 4, 6, 7]) -// y = tf.constant([5, 2, 5, 10]) -// tf.math.greater_equal(x, y) ==> [True, True, True, False] -// -// x = tf.constant([5, 4, 6, 7]) -// y = tf.constant([5]) -// tf.math.greater_equal(x, y) ==> [True, False, True, True] -// ``` -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GreaterEqual", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BatchAttr is an optional argument to Batch. -type BatchAttr func(optionalAttr) - -// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. -// If not specified, defaults to 10 -func BatchMaxEnqueuedBatches(value int64) BatchAttr { - return func(m optionalAttr) { - m["max_enqueued_batches"] = value - } -} - -// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. -// If not specified, defaults to <> -func BatchAllowedBatchSizes(value []int64) BatchAttr { - return func(m optionalAttr) { - m["allowed_batch_sizes"] = value - } -} - -// BatchContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func BatchContainer(value string) BatchAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// BatchSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func BatchSharedName(value string) BatchAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// BatchBatchingQueue sets the optional batching_queue attribute to value. -// If not specified, defaults to "" -func BatchBatchingQueue(value string) BatchAttr { - return func(m optionalAttr) { - m["batching_queue"] = value - } -} - -// Batches all input tensors nondeterministically. -// -// When many instances of this Op are being run concurrently with the same -// container/shared_name in the same device, some will output zero-shaped Tensors -// and others will output Tensors of size up to max_batch_size. -// -// All Tensors in in_tensors are batched together (so, for example, labels and -// features should be batched with a single instance of this operation. -// -// Each invocation of batch emits an `id` scalar which will be used to identify -// this particular invocation when doing unbatch or its gradient. -// -// Each op which emits a non-empty batch will also emit a non-empty batch_index -// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, -// start, and length of elements of each set of Tensors present in batched_tensors. -// -// Batched tensors are concatenated along the first dimension, and all tensors in -// in_tensors must have the first dimension of the same size. -// -// in_tensors: The tensors to be batched. -// num_batch_threads: Number of scheduling threads for processing batches of work. -// Determines the number of batches processed in parallel. -// max_batch_size: Batch sizes will never be bigger than this. -// batch_timeout_micros: Maximum number of microseconds to wait before outputting -// an incomplete batch. -// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does -// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad -// batches up to one of those sizes. The entries must increase monotonically, and -// the final entry must equal max_batch_size. -// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. -// batched_tensors: Either empty tensors or a batch of concatenated Tensors. -// batch_index: If out_tensors is non-empty, has information to invert it. -// container: Controls the scope of sharing of this batch. -// id: always contains a scalar with a unique ID for this invocation of Batch. -// shared_name: Concurrently running instances of batch in the same device with the -// same container and shared_name will batch their elements together. If left -// empty, the op name will be used as the shared name. -// T: the types of tensors to be batched. -func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Batch", - Input: []tf.Input{ - tf.OutputList(in_tensors), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { - scope.UpdateErr("Batch", err) - return - } - batch_index = op.Output(idx) - id = op.Output(idx) - return batched_tensors, batch_index, id -} - -// UnicodeDecodeAttr is an optional argument to UnicodeDecode. -type UnicodeDecodeAttr func(optionalAttr) - -// UnicodeDecodeErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeErrors(value string) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. -// -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["replace_control_characters"] = value - } -} - -// UnicodeDecodeTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func UnicodeDecodeTsplits(value tf.DataType) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// -// The `row_splits` tensor indicates where the codepoints for -// each input string begin and end within the `char_values` tensor. -// In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: -// -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). -// -// Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// -// Returns: -// row_splits: A 1D int32 tensor containing the row splits. -// char_values: A 1D int32 Tensor containing the decoded codepoints. -func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_encoding": input_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeDecode", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Create a dense tensor from a ragged tensor, possibly altering its shape. -// -// The `ragged_to_dense` op creates a dense tensor from a list of row partition -// tensors, a value vector, and default values. If the shape is unspecified, the -// minimal shape required to contain all the elements in the ragged tensor (the -// natural shape) will be used. If some dimensions are left unspecified, then the -// size of the natural shape is used in that dimension. -// -// The default_value will be broadcast to the output shape. After that, the values -// from the ragged tensor overwrite the default values. Note that the default_value -// must have less dimensions than the value. -// -// The row partition tensors are in the order of the dimensions. -// At present, the types can be: -// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. -// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. -// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it -// is preceded by "FIRST_DIM_SIZE". -// -// Arguments: -// shape: The desired shape of the the output tensor. If left unspecified (empty), -// the minimal shape required to contain all the elements in the ragged tensor -// (the natural shape) will be used. If some dimensions are left unspecified, then -// the size of the natural shape is used in that dimension. -// -// Note that dense dimensions cannot be modified by the shape argument. Trying to -// change the size of a dense dimension will cause the op to fail. -// Examples: -// natural shape: [4, 5, 6] -// shape: -1 -// output shape: [4, 5, 6] -// -// natural shape: [4, 5, 6] -// shape: [3, -1, 2] -// output shape: [3, 5, 2] -// -// natural shape: [4, 5, 6] -// shape: [3, 7, 2] -// output shape: [3, 7, 2] -// -// values: A 1D tensor representing the values of the ragged tensor. -// default_value: The default_value when the shape is larger than the ragged tensor. The -// default_value is broadcast until it is the shape of the output tensor, and -// then overwritten by values in the ragged tensor. The default value must be -// compatible with this broadcast operation, and must have fewer dimensions than -// the value tensor. -// -// row_partition_types: The types of the row partition tensors. At present, these can be: -// * "ROW_SPLITS": the row_splits tensor from the ragged tensor. -// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor. -// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it -// is preceeded by "FIRST_DIM_SIZE". -// The tensors are in the order of the dimensions. -// -// Returns The resulting dense tensor. -func RaggedTensorToTensor(scope *Scope, shape tf.Output, values tf.Output, default_value tf.Output, row_partition_tensors []tf.Output, row_partition_types []string) (result tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"row_partition_types": row_partition_types} - opspec := tf.OpSpec{ - Type: "RaggedTensorToTensor", - Input: []tf.Input{ - shape, values, default_value, tf.OutputList(row_partition_tensors), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BatchMatMulAttr is an optional argument to BatchMatMul. -type BatchMatMulAttr func(optionalAttr) - -// BatchMatMulAdjX sets the optional adj_x attribute to value. -// -// value: If `True`, adjoint the slices of `x`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulAdjX(value bool) BatchMatMulAttr { - return func(m optionalAttr) { - m["adj_x"] = value - } -} - -// BatchMatMulAdjY sets the optional adj_y attribute to value. -// -// value: If `True`, adjoint the slices of `y`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulAdjY(value bool) BatchMatMulAttr { - return func(m optionalAttr) { - m["adj_y"] = value - } -} - -// Multiplies slices of two tensors in batches. -// -// Multiplies all slices of `Tensor` `x` and `y` (each slice can be -// viewed as an element of a batch), and arranges the individual results -// in a single output tensor of the same batch size. Each of the -// individual slices can optionally be adjointed (to adjoint a matrix -// means to transpose and conjugate it) before multiplication by setting -// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. -// -// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` -// and `[..., r_y, c_y]`. -// -// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: -// -// r_o = c_x if adj_x else r_x -// c_o = r_y if adj_y else c_y -// -// It is computed as: -// -// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) -// -// Arguments: -// x: 2-D or higher with shape `[..., r_x, c_x]`. -// y: 2-D or higher with shape `[..., r_y, c_y]`. -// -// Returns 3-D or higher with shape `[..., r_o, c_o]` -func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BatchMatMul", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RaggedTensorFromVariantAttr is an optional argument to RaggedTensorFromVariant. -type RaggedTensorFromVariantAttr func(optionalAttr) - -// RaggedTensorFromVariantTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func RaggedTensorFromVariantTsplits(value tf.DataType) RaggedTensorFromVariantAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Decodes a `variant` Tensor into a `RaggedTensor`. -// -// Decodes the given `variant` Tensor and returns a `RaggedTensor`. The input -// could be a scalar, meaning it encodes a single `RaggedTensor` with ragged_rank -// `output_ragged_rank`. It could also have an arbitrary rank, in which case each -// element is decoded into a `RaggedTensor` with ragged_rank `input_ragged_rank` -// and these are then stacked according to the input shape to output a single -// `RaggedTensor` with ragged_rank `output_ragged_rank`. Each `variant` element in -// the input Tensor is decoded by retrieving from the element a 1-D `variant` -// Tensor with `input_ragged_rank + 1` Tensors, corresponding to the splits and -// values of the decoded `RaggedTensor`. If `input_ragged_rank` is -1, then it is -// inferred as `output_ragged_rank` - `rank(encoded_ragged)`. See -// `RaggedTensorToVariant` for the corresponding encoding logic. -// -// -// Arguments: -// encoded_ragged: A `variant` Tensor containing encoded `RaggedTensor`s. -// input_ragged_rank: The ragged rank of each encoded `RaggedTensor` component in the input. If set to -// -1, this is inferred as `output_ragged_rank` - `rank(encoded_ragged)` -// output_ragged_rank: The expected ragged rank of the output `RaggedTensor`. The following must hold: -// `output_ragged_rank = rank(encoded_ragged) + input_ragged_rank`. -// -// -// Returns: -// output_nested_splits: A list of one or more Tensors representing the splits of the output -// `RaggedTensor`. -// output_dense_values: A Tensor representing the values of the output `RaggedTensor`. -func RaggedTensorFromVariant(scope *Scope, encoded_ragged tf.Output, input_ragged_rank int64, output_ragged_rank int64, Tvalues tf.DataType, optional ...RaggedTensorFromVariantAttr) (output_nested_splits []tf.Output, output_dense_values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_ragged_rank": input_ragged_rank, "output_ragged_rank": output_ragged_rank, "Tvalues": Tvalues} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RaggedTensorFromVariant", - Input: []tf.Input{ - encoded_ragged, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { - scope.UpdateErr("RaggedTensorFromVariant", err) - return - } - output_dense_values = op.Output(idx) - return output_nested_splits, output_dense_values -} - -// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. -type RandomPoissonV2Attr func(optionalAttr) - -// RandomPoissonV2Seed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// RandomPoissonV2Dtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT64 -func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from the Poisson distribution(s) described by rate. -// -// This op uses two algorithms, depending on rate. If rate >= 10, then -// the algorithm by Hormann is used to acquire samples via -// transformation-rejection. -// See http://www.sciencedirect.com/science/article/pii/0167668793909974. -// -// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform -// random variables. -// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer -// Programming, Volume 2. Addison Wesley -// -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in rate. -// rate: A tensor in which each scalar is a "rate" parameter describing the -// associated poisson distribution. -// -// Returns A tensor with shape `shape + shape(rate)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `rate[i0, i1, ...iN]`. -func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomPoissonV2", - Input: []tf.Input{ - shape, rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that takes a Bernoulli sample of the contents of another dataset. -// -// There is no transformation in the `tf.data` Python API for creating this dataset. -// Instead, it is created as a result of the `filter_with_random_uniform_fusion` -// static optimization. Whether this optimization is performed is determined by the -// `experimental_optimization.filter_with_random_uniform_fusion` option of -// `tf.data.Options`. -// -// Arguments: -// -// rate: A scalar representing the sample rate. Each element of `input_dataset` is -// retained with this probability, independent of all other elements. -// seed: A scalar representing seed of random number generator. -// seed2: A scalar representing seed2 of random number generator. -// -// -func SamplingDataset(scope *Scope, input_dataset tf.Output, rate tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SamplingDataset", - Input: []tf.Input{ - input_dataset, rate, seed, seed2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reads and outputs the entire contents of the input filename. -func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReadFile", - Input: []tf.Input{ - filename, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes requantization range per channel. -// -// Arguments: -// input: The original input tensor. -// input_min: The minimum value of the input tensor -// input_max: The maximum value of the input tensor. -// clip_value_max: The maximum value of the output that needs to be clipped. -// Example: set this to 6 for Relu6. -// -// Returns: -// output_min: The minimum value of the final output tensor -// output_max: The maximum value of the final output tensor. -func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"clip_value_max": clip_value_max} - opspec := tf.OpSpec{ - Type: "RequantizationRangePerChannel", - Input: []tf.Input{ - input, input_min, input_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. -// -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TruncatedNormal", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal. -type ParameterizedTruncatedNormalAttr func(optionalAttr) - -// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a normal distribution. The parameters may each be a -// -// scalar which applies to the entire output, or a vector of length shape[0] which -// stores the parameters for each batch. -// -// Arguments: -// shape: The shape of the output tensor. Batches are indexed by the 0th dimension. -// means: The mean parameter of each batch. -// stdevs: The standard deviation parameter of each batch. Must be greater than 0. -// minvals: The minimum cutoff. May be -infinity. -// maxvals: The maximum cutoff. May be +infinity, and must be more than the minval -// for each batch. -// -// Returns A matrix of shape num_batches x samples_per_batch, filled with random -// truncated normal values using the parameters for each row. -func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParameterizedTruncatedNormal", - Input: []tf.Input{ - shape, means, stdevs, minvals, maxvals, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. -type QuantizedMatMulAttr func(optionalAttr) - -// QuantizedMatMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. -// -// value: The type of output produced by activation function -// following this operation. -// If not specified, defaults to DT_QUINT8 -func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Tactivation"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b`. -// -// The inputs must be two-dimensional matrices and the inner dimension of -// `a` (after being transposed if `transpose_a` is non-zero) must match the -// outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). -// -// Arguments: -// a: Must be a two-dimensional tensor. -// b: Must be a two-dimensional tensor. -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. -// -// Returns: -// out -// min_out: The float value that the lowest quantized output value represents. -// max_out: The float value that the highest quantized output value represents. -func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedMatMul", - Input: []tf.Input{ - a, b, min_a, max_a, min_b, max_b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Extract `patches` from `images` and put them in the "depth" output dimension. -// -// Arguments: -// images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`. -// ksizes: The size of the sliding window for each dimension of `images`. -// strides: How far the centers of two consecutive patches are in -// the images. Must be: `[1, stride_rows, stride_cols, 1]`. -// rates: Must be: `[1, rate_rows, rate_cols, 1]`. This is the -// input stride, specifying how far two consecutive patch samples are in the -// input. Equivalent to extracting patches with -// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by -// subsampling them spatially by a factor of `rates`. This is equivalent to -// `rate` in dilated (a.k.a. Atrous) convolutions. -// padding: The type of padding algorithm to use. -// -// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows * -// ksize_cols * depth]` containing image patches with size -// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note -// `out_rows` and `out_cols` are the dimensions of the output patches. -func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "ExtractImagePatches", - Input: []tf.Input{ - images, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Forwards the value of an available tensor from `inputs` to `output`. -// -// `Merge` waits for at least one of the tensors in `inputs` to become available. -// It is usually combined with `Switch` to implement branching. -// -// `Merge` forwards the first tensor to become available to `output`, and sets -// `value_index` to its index in `inputs`. -// -// Arguments: -// inputs: The input tensors, exactly one of which will become available. -// -// Returns: -// output: Will be set to the available input tensor. -// value_index: The index of the chosen input tensor in `inputs`. -func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Merge", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// PaddedBatchDatasetV2Attr is an optional argument to PaddedBatchDatasetV2. -type PaddedBatchDatasetV2Attr func(optionalAttr) - -// PaddedBatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value. -// If not specified, defaults to false -func PaddedBatchDatasetV2ParallelCopy(value bool) PaddedBatchDatasetV2Attr { - return func(m optionalAttr) { - m["parallel_copy"] = value - } -} - -// Creates a dataset that batches and pads `batch_size` elements from the input. -// -// Arguments: -// -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// padded_shapes: A list of int64 tensors representing the desired padded shapes -// of the corresponding output components. These shapes may be partially -// specified, using `-1` to indicate that a particular dimension should be -// padded to the maximum size of all batch elements. -// padding_values: A list of scalars containing the padding value to use for -// each of the outputs. -// drop_remainder: A scalar representing whether the last batch should be dropped in case its size -// is smaller than desired. -// -func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, drop_remainder tf.Output, output_shapes []tf.Shape, optional ...PaddedBatchDatasetV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PaddedBatchDatasetV2", - Input: []tf.Input{ - input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), drop_remainder, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BlockLSTMV2Attr is an optional argument to BlockLSTMV2. -type BlockLSTMV2Attr func(optionalAttr) - -// BlockLSTMV2CellClip sets the optional cell_clip attribute to value. -// -// value: Value to clip the 'cs' value to. -// If not specified, defaults to 0 -func BlockLSTMV2CellClip(value float32) BlockLSTMV2Attr { - return func(m optionalAttr) { - m["cell_clip"] = value - } -} - -// BlockLSTMV2UsePeephole sets the optional use_peephole attribute to value. -// -// value: Whether to use peephole weights. -// If not specified, defaults to false -func BlockLSTMV2UsePeephole(value bool) BlockLSTMV2Attr { - return func(m optionalAttr) { - m["use_peephole"] = value - } -} - -// Computes the LSTM cell forward propagation for all the time steps. -// -// This is equivalent to applying LSTMBlockCell in a loop, like so: -// -// ```python -// for x1 in unpack(x): -// i1, cs1, f1, o1, ci1, co1, h1 = LSTMBlock( -// x1, cs_prev, h_prev, w, wci, wcf, wco, b) -// cs_prev = cs1 -// h_prev = h1 -// i.append(i1) -// cs.append(cs1) -// f.append(f1) -// o.append(o1) -// ci.append(ci1) -// co.append(co1) -// h.append(h1) -// return pack(i), pack(cs), pack(f), pack(o), pack(ci), pack(ch), pack(h) -// -// Note that unlike LSTMBlockCell (and BlockLSTM) which uses ICFO gate layout, -// this op uses IFCO. So in order for the following snippet to be equivalent -// all gate-related outputs should be reordered. -// ``` -// -// Arguments: -// seq_len_max: Maximum time length actually used by this input. Outputs are padded -// with zeros beyond this length. -// x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs). -// cs_prev: Value of the initial cell state. -// h_prev: Initial output of cell (to be used for peephole). -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// -// Returns: -// i: The input gate over the whole time sequence. -// cs: The cell state before the tanh over the whole time sequence. -// f: The forget gate over the whole time sequence. -// o: The output gate over the whole time sequence. -// ci: The cell input over the whole time sequence. -// co: The cell after the tanh over the whole time sequence. -// h: The output h vector over the whole time sequence. -func BlockLSTMV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...BlockLSTMV2Attr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BlockLSTMV2", - Input: []tf.Input{ - seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// Return a tensor with the same shape and contents as the input tensor or value. -func Identity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Identity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs a `Summary` protocol buffer with scalar values. -// -// The input `tags` and `values` must have the same shape. The generated summary -// has a summary value for each tag-value pair in `tags` and `values`. -// -// Arguments: -// tags: Tags for the summary. -// values: Same shape as `tags. Values for the summary. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ScalarSummary", - Input: []tf.Input{ - tags, values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. -type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) - -// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm. -// -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// prox_v = var -// prox_v -= lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalAdagrad", - Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes numerical negative value element-wise. -// -// I.e., \\(y = -x\\). -func Neg(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Neg", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Calculates the gradient of the SparseMatrixSoftmax op. -// -// Arguments: -// softmax: A CSRSparseMatrix. -// grad_softmax: The gradient of `softmax`. -// -// -// Returns The output gradient. -func SparseMatrixSoftmaxGrad(scope *Scope, softmax tf.Output, grad_softmax tf.Output, type_ tf.DataType) (gradient tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixSoftmaxGrad", - Input: []tf.Input{ - softmax, grad_softmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the GRU cell back-propagation for 1 time step. -// -// Args -// x: Input to the GRU cell. -// h_prev: State input from the previous GRU cell. -// w_ru: Weight matrix for the reset and update gate. -// w_c: Weight matrix for the cell connection gate. -// b_ru: Bias vector for the reset and update gate. -// b_c: Bias vector for the cell connection gate. -// r: Output of the reset gate. -// u: Output of the update gate. -// c: Output of the cell connection gate. -// d_h: Gradients of the h_new wrt to objective function. -// -// Returns -// d_x: Gradients of the x wrt to objective function. -// d_h_prev: Gradients of the h wrt to objective function. -// d_c_bar Gradients of the c_bar wrt to objective function. -// d_r_bar_u_bar Gradients of the r_bar & u_bar wrt to objective function. -// -// This kernel op implements the following mathematical equations: -// -// Note on notation of the variables: -// -// Concatenation of a and b is represented by a_b -// Element-wise dot product of a and b is represented by ab -// Element-wise dot product is represented by \circ -// Matrix multiplication is represented by * -// -// Additional notes for clarity: -// -// `w_ru` can be segmented into 4 different matrices. -// ``` -// w_ru = [w_r_x w_u_x -// w_r_h_prev w_u_h_prev] -// ``` -// Similarly, `w_c` can be segmented into 2 different matrices. -// ``` -// w_c = [w_c_x w_c_h_prevr] -// ``` -// Same goes for biases. -// ``` -// b_ru = [b_ru_x b_ru_h] -// b_c = [b_c_x b_c_h] -// ``` -// Another note on notation: -// ``` -// d_x = d_x_component_1 + d_x_component_2 -// -// where d_x_component_1 = d_r_bar * w_r_x^T + d_u_bar * w_r_x^T -// and d_x_component_2 = d_c_bar * w_c_x^T -// -// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + d_h \circ u -// where d_h_prev_componenet_1 = d_r_bar * w_r_h_prev^T + d_u_bar * w_r_h_prev^T -// ``` -// -// Mathematics behind the Gradients below: -// ``` -// d_c_bar = d_h \circ (1-u) \circ (1-c \circ c) -// d_u_bar = d_h \circ (h-c) \circ u \circ (1-u) -// -// d_r_bar_u_bar = [d_r_bar d_u_bar] -// -// [d_x_component_1 d_h_prev_component_1] = d_r_bar_u_bar * w_ru^T -// -// [d_x_component_2 d_h_prevr] = d_c_bar * w_c^T -// -// d_x = d_x_component_1 + d_x_component_2 -// -// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + u -// ``` -// Below calculation is performed in the python wrapper for the Gradients -// (not in the gradient kernel.) -// ``` -// d_w_ru = x_h_prevr^T * d_c_bar -// -// d_w_c = x_h_prev^T * d_r_bar_u_bar -// -// d_b_ru = sum of d_r_bar_u_bar along axis = 0 -// -// d_b_c = sum of d_c_bar along axis = 0 -// ``` -func GRUBlockCellGrad(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output, r tf.Output, u tf.Output, c tf.Output, d_h tf.Output) (d_x tf.Output, d_h_prev tf.Output, d_c_bar tf.Output, d_r_bar_u_bar tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GRUBlockCellGrad", - Input: []tf.Input{ - x, h_prev, w_ru, w_c, b_ru, b_c, r, u, c, d_h, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// TextLineReaderV2Attr is an optional argument to TextLineReaderV2. -type TextLineReaderV2Attr func(optionalAttr) - -// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value. -// -// value: Number of lines to skip from the beginning of every file. -// If not specified, defaults to 0 -func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr { - return func(m optionalAttr) { - m["skip_header_lines"] = value - } -} - -// TextLineReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TextLineReaderV2Container(value string) TextLineReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TextLineReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A Reader that outputs the lines of a file delimited by '\n'. -// -// Returns The handle to reference the Reader. -func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TextLineReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Encode audio data using the WAV file format. -// -// This operation will generate a string suitable to be saved out to create a .wav -// audio file. It will be encoded in the 16-bit PCM format. It takes in float -// values in the range -1.0f to 1.0f, and any outside that value will be clamped to -// that range. -// -// `audio` is a 2-D float Tensor of shape `[length, channels]`. -// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). -// -// Arguments: -// audio: 2-D with shape `[length, channels]`. -// sample_rate: Scalar containing the sample frequency. -// -// Returns 0-D. WAV-encoded file contents. -func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "EncodeWav", - Input: []tf.Input{ - audio, sample_rate, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EuclideanNormAttr is an optional argument to EuclideanNorm. -type EuclideanNormAttr func(optionalAttr) - -// EuclideanNormKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func EuclideanNormKeepDims(value bool) EuclideanNormAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the euclidean norm of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func EuclideanNorm(scope *Scope, input tf.Output, axis tf.Output, optional ...EuclideanNormAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EuclideanNorm", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// IRFFT3DAttr is an optional argument to IRFFT3D. -type IRFFT3DAttr func(optionalAttr) - -// IRFFT3DTreal sets the optional Treal attribute to value. -// If not specified, defaults to DT_FLOAT -func IRFFT3DTreal(value tf.DataType) IRFFT3DAttr { - return func(m optionalAttr) { - m["Treal"] = value - } -} - -// Inverse 3D real-valued fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 3 dimensions of `input`. -// -// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 3 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along each axis `IRFFT3D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A complex tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 3D real Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.irfftn with 3 dimensions. -// @end_compatibility -func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFT3DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IRFFT3D", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the element-wise min of two SparseTensors. -// -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. -// -// Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. -// -// Returns: -// output_indices: 2-D. The indices of the output SparseTensor. -// output_values: 1-D. The values of the output SparseTensor. -func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSparseMinimum", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2. -type StatefulStandardNormalV2Attr func(optionalAttr) - -// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. -// -// Returns A tensor of the specified shape filled with random normal values. -func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatefulStandardNormalV2", - Input: []tf.Input{ - resource, algorithm, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. -type ResourceSparseApplyFtrlAttr func(optionalAttr) - -// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyFtrlMultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. -// If not specified, defaults to false -func ResourceSparseApplyFtrlMultiplyLinearByLr(value bool) ResourceSparseApplyFtrlAttr { - return func(m optionalAttr) { - m["multiply_linear_by_lr"] = value - } -} - -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. -// -// That is for rows we have grad for, we update var, accum and linear as follows: -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrl", - Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Debugging/model interpretability outputs for each example. -// -// It traverses all the trees and computes debug metrics for individual examples, -// such as getting split feature ids and logits after each split along the decision -// path used to compute directional feature contributions. -// -// Arguments: -// -// bucketized_features: A list of rank 1 Tensors containing bucket id for each -// feature. -// logits_dimension: scalar, dimension of the logits, to be used for constructing the protos in -// examples_debug_outputs_serialized. -// -// Returns Output rank 1 Tensor containing a proto serialized as a string for each example. -func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (examples_debug_outputs_serialized tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesExampleDebugOutputs", - Input: []tf.Input{ - tree_ensemble_handle, tf.OutputList(bucketized_features), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatefulUniformAttr is an optional argument to StatefulUniform. -type StatefulUniformAttr func(optionalAttr) - -// StatefulUniformDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulUniformDtype(value tf.DataType) StatefulUniformAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. -// -// Returns Random values with specified shape. -func StatefulUniform(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatefulUniform", - Input: []tf.Input{ - resource, algorithm, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates ngrams from ragged string data. -// -// This op accepts a ragged tensor with 1 ragged dimension containing only -// strings and outputs a ragged tensor with 1 ragged dimension containing ngrams -// of that string, joined along the innermost axis. -// -// Arguments: -// data: The values tensor of the ragged string tensor to make ngrams out of. Must be a -// 1D string tensor. -// data_splits: The splits tensor of the ragged string tensor to make ngrams out of. -// separator: The string to append between elements of the token. Use "" for no separator. -// ngram_widths: The sizes of the ngrams to create. -// left_pad: The string to use to pad the left side of the ngram sequence. Only used if -// pad_width != 0. -// right_pad: The string to use to pad the right side of the ngram sequence. Only used if -// pad_width != 0. -// pad_width: The number of padding elements to add to each side of each -// sequence. Note that padding will never be greater than 'ngram_widths'-1 -// regardless of this value. If `pad_width=-1`, then add `max(ngram_widths)-1` -// elements. -// -// -// Returns: -// ngrams: The values tensor of the output ngrams ragged tensor. -// ngrams_splits: The splits tensor of the output ngrams ragged tensor. -func StringNGrams(scope *Scope, data tf.Output, data_splits tf.Output, separator string, ngram_widths []int64, left_pad string, right_pad string, pad_width int64, preserve_short_sequences bool) (ngrams tf.Output, ngrams_splits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"separator": separator, "ngram_widths": ngram_widths, "left_pad": left_pad, "right_pad": right_pad, "pad_width": pad_width, "preserve_short_sequences": preserve_short_sequences} - opspec := tf.OpSpec{ - Type: "StringNGrams", - Input: []tf.Input{ - data, data_splits, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Reduces sparse updates into the variable referenced by `resource` using the `min` operation. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = min(ref[indices, ...], updates[...]) -// -// # Vector indices (for each i) -// ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...]) -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions are combined. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterMin", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Multiplies sparse updates into the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] *= updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] *= updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions multiply. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterMul", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Compresses a dataset element. -func CompressElement(scope *Scope, components []tf.Output) (compressed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CompressElement", - Input: []tf.Input{ - tf.OutputList(components), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatMulAttr is an optional argument to MatMul. -type MatMulAttr func(optionalAttr) - -// MatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, "a" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeA(value bool) MatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// MatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, "b" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeB(value bool) MatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// Multiply the matrix "a" by the matrix "b". -// -// The inputs must be two-dimensional matrices and the inner dimension of -// "a" (after being transposed if transpose_a is true) must match the -// outer dimension of "b" (after being transposed if transposed_b is -// true). -// -// *Note*: The default kernel implementation for MatMul on GPUs uses -// cublas. -func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. -type SparseReduceSumSparseAttr func(optionalAttr) - -// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the sum of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In contrast to SparseReduceSum, this Op returns a -// SparseTensor. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseReduceSumSparse", - Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Computes rectified linear: `max(features, 0)`. -// -// See: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) -// Example usage: -// >>> tf.nn.relu([-2., 0., -0., 3.]).numpy() -// array([ 0., 0., -0., 3.], dtype=float32) -func Relu(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Relu", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Get the number of nodes in a tree -// -// Arguments: -// tree_handle: Handle to the tree resource. -// -// Returns The size of the tree. -func TensorForestTreeSize(scope *Scope, tree_handle tf.Output) (tree_size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeSize", - Input: []tf.Input{ - tree_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Subtracts sparse updates from the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] -= updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] -= updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterSub", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Returns the cardinality of `input_dataset`. -// -// Returns the cardinality of `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to return cardinality for. -// -// Returns The cardinality of `input_dataset`. Named constants are used to represent -// infinite and unknown cardinality. -func DatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DatasetCardinality", - Input: []tf.Input{ - input_dataset, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits each dim-0 slice of `components` once. -func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "TensorSliceDataset", - Input: []tf.Input{ - tf.OutputList(components), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to RetrieveTPUEmbeddingMDLAdagradLightParameters. -type RetrieveTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingMDLAdagradLightParametersTableId(value int64) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMDLAdagradLightParametersTableName(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMDLAdagradLightParametersConfig(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve MDL Adagrad Light embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the MDL Adagrad Light optimization algorithm. -// accumulators: Parameter accumulators updated by the MDL Adagrad Light optimization algorithm. -// weights: Parameter weights updated by the MDL Adagrad Light optimization algorithm. -// benefits: Parameter benefits updated by the MDL Adagrad Light optimization algorithm. -func RetrieveTPUEmbeddingMDLAdagradLightParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMDLAdagradLightParametersAttr) (parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingMDLAdagradLightParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Adds sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] += updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] += updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterAdd", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// This op consumes a lock created by `MutexLock`. -// -// This op exists to consume a tensor created by `MutexLock` (other than -// direct control dependencies). It should be the only that consumes the tensor, -// and will raise an error if it is not. Its only purpose is to keep the -// mutex lock tensor alive until it is consumed by this op. -// -// **NOTE**: This operation must run on the same device as its input. This may -// be enforced via the `colocate_with` mechanism. -// -// Arguments: -// mutex_lock: A tensor returned by `MutexLock`. -// -// Returns the created operation. -func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConsumeMutexLock", - Input: []tf.Input{ - mutex_lock, - }, - } - return scope.AddOperation(opspec) -} - -// Adjust the contrast of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last 3 dimensions are -// interpreted as `[height, width, channels]`. The other dimensions only -// represent a collection of images, such as `[batch, height, width, channels].` -// -// Contrast is adjusted independently for each channel of each image. -// -// For each channel, the Op first computes the mean of the image pixels in the -// channel and then adjusts each component of each pixel to -// `(x - mean) * contrast_factor + mean`. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// contrast_factor: A float multiplier for adjusting contrast. -// -// Returns The contrast-adjusted image or images. -func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustContrastv2", - Input: []tf.Input{ - images, contrast_factor, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces the average pool of the input tensor for quantized types. -// -// Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns: -// output -// min_output: The float value that the lowest quantized output value represents. -// max_output: The float value that the highest quantized output value represents. -func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedAvgPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// VariableShapeAttr is an optional argument to VariableShape. -type VariableShapeAttr func(optionalAttr) - -// VariableShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func VariableShapeOutType(value tf.DataType) VariableShapeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Returns the shape of the variable pointed to by `resource`. -// -// This operation returns a 1-D integer tensor representing the shape of `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "VariableShape", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the minimum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the minimum such that: -// -// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such -// that `segment_ids[j...] == i`. -// -// If the minimum is empty for a given segment ID `i`, it outputs the largest -// possible value for the specific numeric type, -// `output[i] = numeric_limits::max()`. -// -// For example: -// -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_min(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 1, 2, 2, 1], -// # [5, 6, 7, 8]] -// ``` -// -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentMin", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub. -type ResourceScatterNdSubAttr func(optionalAttr) - -// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value. -// -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Applies sparse subtraction to individual values or slices in a Variable. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] -// ``` -// -// For example, say we want to subtract 4 scattered elements from a rank-1 tensor -// with 8 elements. In Python, that subtraction would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// sub = tf.scatter_nd_sub(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(sub) -// ``` -// -// The resulting update to ref would look like this: -// -// [1, -9, 3, -6, -4, 6, 7, -4] -// -// See `tf.scatter_nd` for more details about how to make updates to -// slices. -// -// Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of -// values to add to ref. -// -// Returns the created operation. -func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceScatterNdSub", - Input: []tf.Input{ - ref, indices, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. -type DataFormatDimMapAttr func(optionalAttr) - -// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { - return func(m optionalAttr) { - m["src_format"] = value - } -} - -// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { - return func(m optionalAttr) { - m["dst_format"] = value - } -} - -// Returns the dimension index in the destination data format given the one in -// -// the source data format. -// -// Arguments: -// x: A Tensor with each element as a dimension index in source data format. -// Must be in the range [-4, 4). -// -// Returns A Tensor with each element as a dimension index in destination data format. -func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DataFormatDimMap", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Assigns a new value to a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to return -// this value or a subsequent newer value of the variable. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value to set the new tensor to use. -// -// Returns the created operation. -func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// UpperBoundAttr is an optional argument to UpperBound. -type UpperBoundAttr func(optionalAttr) - -// UpperBoundOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func UpperBoundOutType(value tf.DataType) UpperBoundAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Applies upper_bound(sorted_search_values, values) along each row. -// -// Each set of rows with the same index in (sorted_inputs, values) is treated -// independently. The resulting row is the equivalent of calling -// `np.searchsorted(sorted_inputs, values, side='right')`. -// -// The result is not a global index to the entire -// `Tensor`, but rather just the index in the last dimension. -// -// A 2-D example: -// sorted_sequence = [[0, 3, 9, 9, 10], -// [1, 2, 3, 4, 5]] -// values = [[2, 4, 9], -// [0, 2, 6]] -// -// result = UpperBound(sorted_sequence, values) -// -// result == [[1, 2, 4], -// [0, 2, 5]] -// -// Arguments: -// sorted_inputs: 2-D Tensor where each row is ordered. -// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains -// the values that will be searched for in `sorted_search_values`. -// -// Returns A `Tensor` with the same shape as `values`. It contains the last scalar index -// into the last dimension where values can be inserted without changing the -// ordered property. -func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UpperBound", - Input: []tf.Input{ - sorted_inputs, values, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) - -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. -// If not specified, defaults to false -func ResourceApplyFtrlV2MultiplyLinearByLr(value bool) ResourceApplyFtrlV2Attr { - return func(m optionalAttr) { - m["multiply_linear_by_lr"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. -// -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regularization. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", - Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Deprecated. Use TensorArraySplitV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 -func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArraySplitV2", - Input: []tf.Input{ - handle, value, lengths, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits. -type ComputeAccidentalHitsAttr func(optionalAttr) - -// ComputeAccidentalHitsSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Computes the ids of the positions in sampled_candidates that match true_labels. -// -// When doing log-odds NCE, the result of this op should be passed through a -// SparseToDense op, then added to the logits of the sampled candidates. This has -// the effect of 'removing' the sampled labels that match the true labels by -// making the classifier sure that they are sampled labels. -// -// Arguments: -// true_classes: The true_classes output of UnpackSparseLabels. -// sampled_candidates: The sampled_candidates output of CandidateSampler. -// num_true: Number of true labels per context. -// -// Returns: -// indices: A vector of indices corresponding to rows of true_candidates. -// ids: A vector of IDs of positions in sampled_candidates that match a true_label -// for the row with the corresponding index in indices. -// weights: A vector of the same length as indices and ids, in which each element -// is -FLOAT_MAX. -func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ComputeAccidentalHits", - Input: []tf.Input{ - true_classes, sampled_candidates, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// VarHandleOpAttr is an optional argument to VarHandleOp. -type VarHandleOpAttr func(optionalAttr) - -// VarHandleOpContainer sets the optional container attribute to value. -// -// value: the container this variable is placed in. -// If not specified, defaults to "" -func VarHandleOpContainer(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// VarHandleOpSharedName sets the optional shared_name attribute to value. -// -// value: the name by which this variable is referred to. -// If not specified, defaults to "" -func VarHandleOpSharedName(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// VarHandleOpAllowedDevices sets the optional allowed_devices attribute to value. -// -// value: The allowed devices containing the resource variable. Set when the output -// ResourceHandle represents a per-replica/partitioned resource variable. -// If not specified, defaults to <> -func VarHandleOpAllowedDevices(value []string) VarHandleOpAttr { - return func(m optionalAttr) { - m["allowed_devices"] = value - } -} - -// Creates a handle to a Variable resource. -// -// Arguments: -// dtype: the type of this variable. Must agree with the dtypes -// of all ops using this variable. -// shape: The (possibly partially specified) shape of this variable. -func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "VarHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns (x - y)(x - y) element-wise. -// -// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SquaredDifference", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits the records from one or more TFRecord files. -// -// Arguments: -// filenames: A scalar or vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar representing the number of bytes to buffer. A value of -// 0 means no buffering will be performed. -func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TFRecordDataset", - Input: []tf.Input{ - filenames, compression_type, buffer_size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RpcAttr is an optional argument to Rpc. -type RpcAttr func(optionalAttr) - -// RpcProtocol sets the optional protocol attribute to value. -// -// value: RPC protocol to use. Empty string means use the default protocol. -// Options include 'grpc'. -// If not specified, defaults to "" -func RpcProtocol(value string) RpcAttr { - return func(m optionalAttr) { - m["protocol"] = value - } -} - -// RpcFailFast sets the optional fail_fast attribute to value. -// -// value: `boolean`. If `true` (default), then failures to connect -// (i.e., the server does not immediately respond) cause an RPC failure. -// If not specified, defaults to true -func RpcFailFast(value bool) RpcAttr { - return func(m optionalAttr) { - m["fail_fast"] = value - } -} - -// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value. -// -// value: `int`. If `0` (default), then the kernel will run the RPC -// request and only time out if the RPC deadline passes or the session times out. -// If this value is greater than `0`, then the op will raise an exception if -// the RPC takes longer than `timeout_in_ms`. -// If not specified, defaults to 0 -func RpcTimeoutInMs(value int64) RpcAttr { - return func(m optionalAttr) { - m["timeout_in_ms"] = value - } -} - -// Perform batches of RPC requests. -// -// This op asynchronously performs either a single RPC request, or a batch -// of requests. RPC requests are defined by three main parameters: -// -// - `address` (the host+port or BNS address of the request) -// - `method` (the RPC method name for the request) -// - `request` (the serialized proto string, or vector of strings, -// of the RPC request argument). -// -// For example, if you have an RPC service running on port localhost:2345, -// and its interface is configured with the following proto declaration: -// -// ``` -// service MyService { -// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { -// } -// }; -// ``` -// -// then call this op with arguments: -// -// ``` -// address = "localhost:2345" -// method = "MyService/MyMethod" -// ``` -// -// The `request` tensor is a string tensor representing serialized `MyRequestProto` -// strings; and the output string tensor `response` will have the same shape -// and contain (upon successful completion) corresponding serialized -// `MyResponseProto` strings. -// -// For example, to send a single, empty, `MyRequestProto`, call -// this op with `request = ""`. To send 5 **parallel** empty requests, -// call this op with `request = ["", "", "", "", ""]`. -// -// More generally, one can create a batch of `MyRequestProto` serialized protos -// from regular batched tensors using the `encode_proto` op, and convert -// the response `MyResponseProto` serialized protos to batched tensors -// using the `decode_proto` op. -// -// **NOTE** Working with serialized proto strings is faster than instantiating -// actual proto objects in memory, so no performance degradation is expected -// compared to writing custom kernels for this workflow. -// -// If the connection fails or the remote worker returns an error -// status, the op reraises this exception locally. -// -// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph. -// -// Arguments: -// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `method` and `request`. -// method: `0-D` or `1-D`. The method address on the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `request`. -// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `method`. -// -// Returns Same shape as `request`. Serialized proto strings: the rpc responses. -func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Rpc", - Input: []tf.Input{ - address, method, request, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BoostedTreesQuantileStreamResourceHandleOpAttr is an optional argument to BoostedTreesQuantileStreamResourceHandleOp. -type BoostedTreesQuantileStreamResourceHandleOpAttr func(optionalAttr) - -// BoostedTreesQuantileStreamResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func BoostedTreesQuantileStreamResourceHandleOpContainer(value string) BoostedTreesQuantileStreamResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// BoostedTreesQuantileStreamResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func BoostedTreesQuantileStreamResourceHandleOpSharedName(value string) BoostedTreesQuantileStreamResourceHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a handle to a BoostedTreesQuantileStreamResource. -func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...BoostedTreesQuantileStreamResourceHandleOpAttr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceHandleOp", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. -type ResourceSparseApplyAdagradAttr func(optionalAttr) - -// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. -// -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// EagerPyFuncAttr is an optional argument to EagerPyFunc. -type EagerPyFuncAttr func(optionalAttr) - -// EagerPyFuncIsAsync sets the optional is_async attribute to value. -// If not specified, defaults to false -func EagerPyFuncIsAsync(value bool) EagerPyFuncAttr { - return func(m optionalAttr) { - m["is_async"] = value - } -} - -// Eagerly executes a python function to compute func(input)->output. The -// -// semantics of the input, output, and attributes are the same as those for -// PyFunc. -func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType, optional ...EagerPyFuncAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"token": token, "Tout": Tout} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EagerPyFunc", - Input: []tf.Input{ - tf.OutputList(input), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("EagerPyFunc", err) - return - } - return output -} - -// SdcaOptimizerV2Attr is an optional argument to SdcaOptimizerV2. -type SdcaOptimizerV2Attr func(optionalAttr) - -// SdcaOptimizerV2Adaptive sets the optional adaptive attribute to value. -// -// value: Whether to use Adaptive SDCA for the inner loop. -// If not specified, defaults to true -func SdcaOptimizerV2Adaptive(value bool) SdcaOptimizerV2Attr { - return func(m optionalAttr) { - m["adaptive"] = value - } -} - -// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for -// -// linear models with L1 + L2 regularization. As global optimization objective is -// strongly-convex, the optimizer optimizes the dual objective at each step. The -// optimizer applies each update one example at a time. Examples are sampled -// uniformly, and the optimizer is learning rate free and enjoys linear convergence -// rate. -// -// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
-// Shai Shalev-Shwartz, Tong Zhang. 2012 -// -// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ -// -// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, -// Peter Richtarik, Martin Takac. 2015 -// -// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 -// -// Arguments: -// sparse_example_indices: a list of vectors which contain example indices. -// sparse_feature_indices: a list of vectors which contain feature indices. -// sparse_feature_values: a list of vectors which contains feature value -// associated with each feature group. -// dense_features: a list of matrices which contains the dense feature values. -// example_weights: a vector which contains the weight associated with each -// example. -// example_labels: a vector which contains the label/target associated with each -// example. -// sparse_indices: a list of vectors where each value is the indices which has -// corresponding weights in sparse_weights. This field maybe omitted for the -// dense approach. -// sparse_weights: a list of vectors where each value is the weight associated with -// a sparse feature group. -// dense_weights: a list of vectors where the values are the weights associated -// with a dense feature group. -// example_state_data: a list of vectors containing the example state data. -// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, -// squared and hinge losses. -// l1: Symmetric l1 regularization strength. -// l2: Symmetric l2 regularization strength. -// num_loss_partitions: Number of partitions of the global loss function. -// num_inner_iterations: Number of iterations per mini-batch. -// -// Returns: -// out_example_state_data: a list of vectors containing the updated example state -// data. -// out_delta_sparse_weights: a list of vectors where each value is the delta -// weights associated with a sparse feature group. -// out_delta_dense_weights: a list of vectors where the values are the delta -// weights associated with a dense feature group. -func SdcaOptimizerV2(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerV2Attr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SdcaOptimizerV2", - Input: []tf.Input{ - tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - out_example_state_data = op.Output(idx) - if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { - scope.UpdateErr("SdcaOptimizerV2", err) - return - } - if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { - scope.UpdateErr("SdcaOptimizerV2", err) - return - } - return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights -} - -// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. -type MaxPool3DGradGradAttr func(optionalAttr) - -// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool3DGradGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that contains the elements of `input_dataset` ignoring errors. -func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "IgnoreErrorsDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deprecated. Use TensorArrayGradV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3 -func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayWriteV2", - Input: []tf.Input{ - handle, index, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. -type DenseToSparseSetOperationAttr func(optionalAttr) - -// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of `Tensor` and `SparseTensor`. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set2` -// indices. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the -// max set size across `n-1` dimensions. -// -// -// Returns: -// result_indices: 2D indices of a `SparseTensor`. -// result_values: 1D values of a `SparseTensor`. -// result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DenseToSparseSetOperation", - Input: []tf.Input{ - set1, set2_indices, set2_values, set2_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// CollectiveBcastRecvAttr is an optional argument to CollectiveBcastRecv. -type CollectiveBcastRecvAttr func(optionalAttr) - -// CollectiveBcastRecvCommunicationHint sets the optional communication_hint attribute to value. -// If not specified, defaults to "auto" -func CollectiveBcastRecvCommunicationHint(value string) CollectiveBcastRecvAttr { - return func(m optionalAttr) { - m["communication_hint"] = value - } -} - -// CollectiveBcastRecvTimeoutSeconds sets the optional timeout_seconds attribute to value. -// If not specified, defaults to 0 -func CollectiveBcastRecvTimeoutSeconds(value float32) CollectiveBcastRecvAttr { - return func(m optionalAttr) { - m["timeout_seconds"] = value - } -} - -// Receives a tensor value broadcast from another device. -func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveBcastRecvAttr) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CollectiveBcastRecv", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Scatter the data from the input value into specific TensorArray elements. -// -// `indices` must be a vector, its length must match the first dim of `value`. -// -// Arguments: -// handle: The handle to a TensorArray. -// indices: The locations at which to write the tensor elements. -// value: The concatenated tensor to write to the TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns A float scalar that enforces proper chaining of operations. -func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayScatterV3", - Input: []tf.Input{ - handle, indices, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the matrix square root of one or more square matrices: -// -// matmul(sqrtm(A), sqrtm(A)) = A -// -// The input matrix should be invertible. If the input matrix is real, it should -// have no eigenvalues which are real and negative (pairs of complex conjugate -// eigenvalues are allowed). -// -// The matrix square root is computed by first reducing the matrix to -// quasi-triangular form with the real Schur decomposition. The square root -// of the quasi-triangular matrix is then computed directly. Details of -// the algorithm can be found in: Nicholas J. Higham, "Computing real -// square roots of a real matrix", Linear Algebra Appl., 1987. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the matrix square root for all input submatrices `[..., :, :]`. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. -// -// @compatibility(scipy) -// Equivalent to scipy.linalg.sqrtm -// @end_compatibility -func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixSquareRoot", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MutexV2Attr is an optional argument to MutexV2. -type MutexV2Attr func(optionalAttr) - -// MutexV2Container sets the optional container attribute to value. -// -// value: If non-empty, this variable is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutexV2Container(value string) MutexV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutexV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this variable is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func MutexV2SharedName(value string) MutexV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a Mutex resource that can be locked by `MutexLock`. -// -// Returns The mutex resource. -func MutexV2(scope *Scope, optional ...MutexV2Attr) (resource tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutexV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of (x < y) element-wise. -// -// *NOTE*: `Less` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// Example: -// -// ```python -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5]) -// tf.math.less(x, y) ==> [False, True, False] -// -// x = tf.constant([5, 4, 6]) -// y = tf.constant([5, 6, 7]) -// tf.math.less(x, y) ==> [False, True, True] -// ``` -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Less", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolGradGradWithArgmaxAttr is an optional argument to MaxPoolGradGradWithArgmax. -type MaxPoolGradGradWithArgmaxAttr func(optionalAttr) - -// MaxPoolGradGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value. -// -// value: Whether to include batch dimension in flattened index of `argmax`. -// If not specified, defaults to false -func MaxPoolGradGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradGradWithArgmaxAttr { - return func(m optionalAttr) { - m["include_batch_in_index"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// input of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input of `max_pool`. -func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradWithArgmaxAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradWithArgmax", - Input: []tf.Input{ - input, grad, argmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StridedSliceAttr is an optional argument to StridedSlice. -type StridedSliceAttr func(optionalAttr) - -// StridedSliceBeginMask sets the optional begin_mask attribute to value. -// -// value: a bitmask where a bit i being 1 means to ignore the begin -// value and instead use the largest interval possible. At runtime -// begin[i] will be replaced with `[0, n-1)` if `stride[i] > 0` or -// `[-1, n-1]` if `stride[i] < 0` -// If not specified, defaults to 0 -func StridedSliceBeginMask(value int64) StridedSliceAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// StridedSliceEndMask sets the optional end_mask attribute to value. -// -// value: analogous to `begin_mask` -// If not specified, defaults to 0 -func StridedSliceEndMask(value int64) StridedSliceAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// StridedSliceEllipsisMask sets the optional ellipsis_mask attribute to value. -// -// value: a bitmask where bit `i` being 1 means the `i`th -// position is actually an ellipsis. One bit at most can be 1. -// If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)` -// is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis -// implicitly creates as many range specifications as necessary to fully -// specify the sliced range for every dimension. For example for a 4-dimensional -// tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`. -// If not specified, defaults to 0 -func StridedSliceEllipsisMask(value int64) StridedSliceAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// StridedSliceNewAxisMask sets the optional new_axis_mask attribute to value. -// -// value: a bitmask where bit `i` being 1 means the `i`th -// specification creates a new shape 1 dimension. For example -// `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor. -// If not specified, defaults to 0 -func StridedSliceNewAxisMask(value int64) StridedSliceAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} - -// StridedSliceShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// -// value: a bitmask where bit `i` implies that the `i`th -// specification should shrink the dimensionality. begin and end -// must imply a slice of size 1 in the dimension. For example in -// python one might do `foo[:, 3, :]` which would result in -// `shrink_axis_mask` being 2. -// If not specified, defaults to 0 -func StridedSliceShrinkAxisMask(value int64) StridedSliceAttr { - return func(m optionalAttr) { - m["shrink_axis_mask"] = value - } -} - -// Return a strided slice from `input`. -// -// Note, most python users will want to use the Python `Tensor.__getitem__` -// or `Variable.__getitem__` rather than this op directly. -// -// The goal of this op is to produce a new tensor with a subset of -// the elements from the `n` dimensional `input` tensor. The subset is chosen using -// a sequence of `m` sparse range specifications encoded into the arguments -// of this function. Note, in some cases -// `m` could be equal to `n`, but this need not be the case. Each -// range specification entry can be one of the following: -// -// - An ellipsis (...). Ellipses are used to imply zero or more -// dimensions of full-dimension selection and are produced using -// `ellipsis_mask`. For example, `foo[...]` is the identity slice. -// -// - A new axis. This is used to insert a new shape=1 dimension and is -// produced using `new_axis_mask`. For example, `foo[:, ...]` where -// `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor. -// -// -// - A range `begin:end:stride`. This is used to specify how much to choose from -// a given dimension. `stride` can be any integer but 0. `begin` is an integer -// which represents the index of the first value to select while `end` represents -// the index of the last value to select. The number of values selected in each -// dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`. -// `begin` and `end` can be negative where `-1` is the last element, `-2` is -// the second to last. `begin_mask` controls whether to replace the explicitly -// given `begin` with an implicit effective value of `0` if `stride > 0` and -// `-1` if `stride < 0`. `end_mask` is analogous but produces the number -// required to create the largest open interval. For example, given a shape -// `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do -// not assume this is equivalent to `foo[0:-1]` which has an effective `begin` -// and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the -// first dimension of a tensor while dropping the last two (in the original -// order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`. -// -// - A single index. This is used to keep only elements that have a given -// index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a -// shape `(6,)` tensor. This is encoded in `begin` and `end` and -// `shrink_axis_mask`. -// -// Each conceptual range specification is encoded in the op's argument. This -// encoding is best understand by considering a non-trivial example. In -// particular, -// `foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as -// -// ``` -// begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0) -// end = [2, 4, x, x, -3, x] -// strides = [1, 1, x, x, -1, 1] -// begin_mask = 1<<4 | 1 << 5 = 48 -// end_mask = 1<<5 = 32 -// ellipsis_mask = 1<<3 = 8 -// new_axis_mask = 1<<2 4 -// shrink_axis_mask = 1<<0 -// ``` -// -// In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of -// the slice becomes (2, 1, 5, 5, 2, 5). -// Let us walk step by step through each argument specification. -// -// 1. The first argument in the example slice is turned into `begin = 1` and -// `end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we -// also set the appropriate bit in `shrink_axis_mask`. -// -// 2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have -// zero bits contributed. -// -// 3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1 -// dimension in the final shape. Dummy values are contributed to begin, -// end and stride, while the new_axis_mask bit is set. -// -// 4. `...` grab the full ranges from as many dimensions as needed to -// fully specify a slice for every dimension of the input shape. -// -// 5. `:-3:-1` shows the use of negative indices. A negative index `i` associated -// with a dimension that has shape `s` is converted to a positive index -// `s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion -// is done internally so begin, end and strides receive x, -3, and -1. -// The appropriate begin_mask bit is set to indicate the start range is the -// full range (ignoring the x). -// -// 6. `:` indicates that the entire contents of the corresponding dimension -// is selected. This is equivalent to `::` or `0::1`. begin, end, and strides -// receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and -// `end_mask` are also set. -// -// *Requirements*: -// `0 != strides[i] for i in [0, m)` -// `ellipsis_mask must be a power of two (only one ellipsis)` -// -// Arguments: -// -// begin: `begin[k]` specifies the offset into the `k`th range specification. -// The exact dimension this corresponds to will be determined by context. -// Out-of-bounds values will be silently clamped. If the `k`th bit of -// `begin_mask` then `begin[k]` is ignored and the full range of the -// appropriate dimension is used instead. Negative values causes indexing -// to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`. -// end: `end[i]` is like `begin` with the exception that `end_mask` is -// used to determine full ranges. -// strides: `strides[i]` specifies the increment in the `i`th specification -// after extracting a given element. Negative indices will reverse -// the original order. Out or range values are -// clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0` -func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, optional ...StridedSliceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StridedSlice", - Input: []tf.Input{ - input, begin, end, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug. -type RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve RMSProp embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the RMSProp optimization algorithm. -// ms: Parameter ms updated by the RMSProp optimization algorithm. -// mom: Parameter mom updated by the RMSProp optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the RMSProp optimization algorithm. -func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation. -type DenseToDenseSetOperationAttr func(optionalAttr) - -// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of 2 `Tensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// -// -// Returns: -// result_indices: 2D indices of a `SparseTensor`. -// result_values: 1D values of a `SparseTensor`. -// result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DenseToDenseSetOperation", - Input: []tf.Input{ - set1, set2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the set of files matching one or more glob patterns. -// -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. -// Note also that the order of filenames returned is deterministic. -// -// Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. -// -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatchingFiles", - Input: []tf.Input{ - pattern, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is -// -// true, this follows Python semantics in that the result here is consistent -// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`. -// -// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FloorMod", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Slice a `SparseTensor` based on the `start` and `size`. -// -// For example, if the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] -// [ a ] -// [b c ] -// -// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] -// [ d e ] -// [ ] -// -// Arguments: -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// start: 1-D. tensor represents the start of the slice. -// size: 1-D. tensor represents the size of the slice. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// -// Returns: -// output_indices -// output_values: A list of 1-D tensors represents the values of the output sparse -// tensors. -// output_shape: A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSlice", - Input: []tf.Input{ - indices, values, shape, start, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// SparseMatrixSparseMatMulAttr is an optional argument to SparseMatrixSparseMatMul. -type SparseMatrixSparseMatMulAttr func(optionalAttr) - -// SparseMatrixSparseMatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: Indicates whether `a` should be transposed. -// If not specified, defaults to false -func SparseMatrixSparseMatMulTransposeA(value bool) SparseMatrixSparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// SparseMatrixSparseMatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: Indicates whether `b` should be transposed. -// If not specified, defaults to false -func SparseMatrixSparseMatMulTransposeB(value bool) SparseMatrixSparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// SparseMatrixSparseMatMulAdjointA sets the optional adjoint_a attribute to value. -// -// value: Indicates whether `a` should be conjugate-transposed. -// If not specified, defaults to false -func SparseMatrixSparseMatMulAdjointA(value bool) SparseMatrixSparseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_a"] = value - } -} - -// SparseMatrixSparseMatMulAdjointB sets the optional adjoint_b attribute to value. -// -// value: Indicates whether `b` should be conjugate-transposed. -// If not specified, defaults to false -func SparseMatrixSparseMatMulAdjointB(value bool) SparseMatrixSparseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_b"] = value - } -} - -// Sparse-matrix-multiplies two CSR matrices `a` and `b`. -// -// Performs a matrix multiplication of a sparse matrix `a` with a sparse matrix -// `b`; returns a sparse matrix `a * b`, unless either `a` or `b` is transposed or -// adjointed. -// -// Each matrix may be transposed or adjointed (conjugated and transposed) -// according to the Boolean parameters `transpose_a`, `adjoint_a`, `transpose_b` -// and `adjoint_b`. At most one of `transpose_a` or `adjoint_a` may be True. -// Similarly, at most one of `transpose_b` or `adjoint_b` may be True. -// -// The inputs must have compatible shapes. That is, the inner dimension of `a` -// must be equal to the outer dimension of `b`. This requirement is adjusted -// according to whether either `a` or `b` is transposed or adjointed. -// -// The `type` parameter denotes the type of the matrix elements. Both `a` and `b` -// must have the same type. The supported types are: `float32`, `float64`, -// `complex64` and `complex128`. -// -// Both `a` and `b` must have the same rank. Broadcasting is not supported. If they -// have rank 3, each batch of 2D CSRSparseMatrices within `a` and `b` must have the -// same dense shape. -// -// The sparse matrix product may have numeric (non-structural) zeros. -// TODO(anudhyan): Consider adding a boolean attribute to control whether to prune -// zeros. -// -// Usage example: -// -// ```python -// from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops -// -// a_indices = np.array([[0, 0], [2, 3], [2, 4], [3, 0]]) -// a_values = np.array([1.0, 5.0, -1.0, -2.0], np.float32) -// a_dense_shape = [4, 5] -// -// b_indices = np.array([[0, 0], [3, 0], [3, 1]]) -// b_values = np.array([2.0, 7.0, 8.0], np.float32) -// b_dense_shape = [5, 3] -// -// with tf.Session() as sess: -// # Define (COO format) Sparse Tensors over Numpy arrays -// a_st = tf.sparse.SparseTensor(a_indices, a_values, a_dense_shape) -// b_st = tf.sparse.SparseTensor(b_indices, b_values, b_dense_shape) -// -// # Convert SparseTensors to CSR SparseMatrix -// a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix( -// a_st.indices, a_st.values, a_st.dense_shape) -// b_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix( -// b_st.indices, b_st.values, b_st.dense_shape) -// -// # Compute the CSR SparseMatrix matrix multiplication -// c_sm = sparse_csr_matrix_ops.sparse_matrix_sparse_mat_mul( -// a=a_sm, b=b_sm, type=tf.float32) -// -// # Convert the CSR SparseMatrix product to a dense Tensor -// c_sm_dense = sparse_csr_matrix_ops.csr_sparse_matrix_to_dense( -// c_sm, tf.float32) -// # Evaluate the dense Tensor value -// c_sm_dense_value = sess.run(c_sm_dense) -// ``` -// -// `c_sm_dense_value` stores the dense matrix product: -// -// ``` -// [[ 2. 0. 0.] -// [ 0. 0. 0.] -// [ 35. 40. 0.] -// [ -4. 0. 0.]] -// ``` -// -// a: A `CSRSparseMatrix`. -// b: A `CSRSparseMatrix` with the same type and rank as `a`. -// type: The type of both `a` and `b`. -// transpose_a: If True, `a` transposed before multiplication. -// transpose_b: If True, `b` transposed before multiplication. -// adjoint_a: If True, `a` adjointed before multiplication. -// adjoint_b: If True, `b` adjointed before multiplication. -// -// Arguments: -// a: A CSRSparseMatrix. -// b: A CSRSparseMatrix. -// -// -// Returns A CSRSparseMatrix. -func SparseMatrixSparseMatMul(scope *Scope, a tf.Output, b tf.Output, type_ tf.DataType, optional ...SparseMatrixSparseMatMulAttr) (c tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatrixSparseMatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CopyHostAttr is an optional argument to CopyHost. -type CopyHostAttr func(optionalAttr) - -// CopyHostTensorName sets the optional tensor_name attribute to value. -// -// value: The name of the input tensor. -// If not specified, defaults to "" -func CopyHostTensorName(value string) CopyHostAttr { - return func(m optionalAttr) { - m["tensor_name"] = value - } -} - -// CopyHostDebugOpsSpec sets the optional debug_ops_spec attribute to value. -// -// value: A list of debug op spec (op, url, gated_grpc) for attached debug -// ops. Each element of the list has the format -// ;;, wherein gated_grpc is boolean represented -// as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1", -// "DebugIdentity;file:///tmp/tfdbg_1;0". -// If not specified, defaults to <> -func CopyHostDebugOpsSpec(value []string) CopyHostAttr { - return func(m optionalAttr) { - m["debug_ops_spec"] = value - } -} - -// Copy a tensor to host. -// -// Performs CPU-to-CPU deep-copying of tensor. -// N.B.: If the all downstream attached debug ops are disabled given the current -// gRPC gating status, the output will simply forward the input tensor without -// deep-copying. See the documentation of Debug* ops for more details. -// -// Unlike the Copy Op, this op has HostMemory constraint on its input or output. -// -// Arguments: -// input: Input tensor. -func CopyHost(scope *Scope, input tf.Output, optional ...CopyHostAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CopyHost", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Sparse addition of two CSR matrices, C = alpha * A + beta * B. -// -// The gradients of SparseMatrixAdd outputs with respect to alpha and beta are not -// currently defined (TensorFlow will return zeros for these entries). -// -// Arguments: -// a: A CSRSparseMatrix. -// b: A CSRSparseMatrix. -// alpha: A constant scalar. -// beta: A constant scalar. -// -// Returns A CSRSparseMatrix. -func SparseMatrixAdd(scope *Scope, a tf.Output, b tf.Output, alpha tf.Output, beta tf.Output) (c tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseMatrixAdd", - Input: []tf.Input{ - a, b, alpha, beta, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseMatrixMatMulAttr is an optional argument to SparseMatrixMatMul. -type SparseMatrixMatMulAttr func(optionalAttr) - -// SparseMatrixMatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: Indicates whether `a` should be transposed. -// If not specified, defaults to false -func SparseMatrixMatMulTransposeA(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// SparseMatrixMatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: Indicates whether `b` should be transposed. -// If not specified, defaults to false -func SparseMatrixMatMulTransposeB(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// SparseMatrixMatMulAdjointA sets the optional adjoint_a attribute to value. -// -// value: Indicates whether `a` should be conjugate-transposed. -// If not specified, defaults to false -func SparseMatrixMatMulAdjointA(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["adjoint_a"] = value - } -} - -// SparseMatrixMatMulAdjointB sets the optional adjoint_b attribute to value. -// -// value: Indicates whether `b` should be conjugate-transposed. -// If not specified, defaults to false -func SparseMatrixMatMulAdjointB(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["adjoint_b"] = value - } -} - -// SparseMatrixMatMulTransposeOutput sets the optional transpose_output attribute to value. -// -// value: Transposes the product of `a` and `b`. -// If not specified, defaults to false -func SparseMatrixMatMulTransposeOutput(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["transpose_output"] = value - } -} - -// SparseMatrixMatMulConjugateOutput sets the optional conjugate_output attribute to value. -// -// value: Conjugates the product of `a` and `b`. -// If not specified, defaults to false -func SparseMatrixMatMulConjugateOutput(value bool) SparseMatrixMatMulAttr { - return func(m optionalAttr) { - m["conjugate_output"] = value - } -} - -// Matrix-multiplies a sparse matrix with a dense matrix. -// -// Returns a dense matrix. -// For inputs A and B, where A is CSR and B is dense; this op returns a dense C; -// -// If transpose_output is false, returns: -// ``` -// C = A . B -// ``` -// -// If transpose_output is `true`, returns: -// ``` -// C = transpose(A . B) = transpose(B) . transpose(A) -// ``` -// where the transposition is performed along the two innermost (matrix) -// dimensions. -// -// If conjugate_output is `true`, returns: -// ``` -// C = conjugate(A . B) = conjugate(A) . conjugate(B) -// ``` -// -// If both conjugate_output and transpose_output are `true`, returns: -// ``` -// C = conjugate(transpose(A . B)) = conjugate(transpose(B)) . -// conjugate(transpose(A)) -// ``` -// -// Arguments: -// a: A CSRSparseMatrix. -// b: A dense tensor. -// -// Returns A dense output tensor. -func SparseMatrixMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatrixMatMulAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatrixMatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reads out the CSR components at batch `index`. -// -// This op is meant only for debugging / testing, and its interface is not expected -// to be stable. -// -// Arguments: -// csr_sparse_matrix: A batched CSRSparseMatrix. -// index: The index in `csr_sparse_matrix`'s batch. -// -// -// Returns: -// row_ptrs: An array containing CSR matrix row pointers. -// col_inds: An array containing CSR matrix column indices. -// values: An array containing CSR matrix nonzero values. -func CSRSparseMatrixComponents(scope *Scope, csr_sparse_matrix tf.Output, index tf.Output, type_ tf.DataType) (row_ptrs tf.Output, col_inds tf.Output, values tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "CSRSparseMatrixComponents", - Input: []tf.Input{ - csr_sparse_matrix, index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StringSplitV2Attr is an optional argument to StringSplitV2. -type StringSplitV2Attr func(optionalAttr) - -// StringSplitV2Maxsplit sets the optional maxsplit attribute to value. -// -// value: An `int`. If `maxsplit > 0`, limit of the split of the result. -// If not specified, defaults to -1 -func StringSplitV2Maxsplit(value int64) StringSplitV2Attr { - return func(m optionalAttr) { - m["maxsplit"] = value - } -} - -// Split elements of `source` based on `sep` into a `SparseTensor`. -// -// Let N be the size of source (typically N will be the batch size). Split each -// element of `source` based on `sep` and return a `SparseTensor` -// containing the split tokens. Empty tokens are ignored. -// -// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', -// then the output will be -// ``` -// st.indices = [0, 0; -// 0, 1; -// 1, 0; -// 1, 1; -// 1, 2] -// st.shape = [2, 3] -// st.values = ['hello', 'world', 'a', 'b', 'c'] -// ``` -// -// If `sep` is given, consecutive delimiters are not grouped together and are -// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and -// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty -// string, consecutive whitespace are regarded as a single separator, and the -// result will contain no empty strings at the startor end if the string has -// leading or trailing whitespace. -// -// Note that the above mentioned behavior matches python's str.split. -// -// Arguments: -// input: `1-D` string `Tensor`, the strings to split. -// sep: `0-D` string `Tensor`, the delimiter character. -func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringSplitV2", - Input: []tf.Input{ - input, sep, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Compute the lower regularized incomplete Gamma function `P(a, x)`. -// -// The lower regularized incomplete Gamma function is defined as: -// -// -// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\) -// -// where -// -// \\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\) -// -// is the lower incomplete Gamma function. -// -// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete -// Gamma function. -func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Igamma", - Input: []tf.Input{ - a, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Convert a (possibly batched) CSRSparseMatrix to dense. -// -// Arguments: -// sparse_input: A batched CSRSparseMatrix. -// -// -// Returns A dense tensor. -func CSRSparseMatrixToDense(scope *Scope, sparse_input tf.Output, type_ tf.DataType) (dense_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "CSRSparseMatrixToDense", - Input: []tf.Input{ - sparse_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle. -type IteratorFromStringHandleAttr func(optionalAttr) - -// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. -// -// value: If specified, defines the type of each tuple component in an -// element produced by the resulting iterator. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr { - return func(m optionalAttr) { - m["output_types"] = value - } -} - -// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. -// -// value: If specified, defines the shape of each tuple component in an -// element produced by the resulting iterator. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr { - return func(m optionalAttr) { - m["output_shapes"] = value - } -} - -// Converts the given string representing a handle to an iterator to a resource. -// -// Arguments: -// string_handle: A string representation of the given handle. -// -// Returns A handle to an iterator resource. -func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IteratorFromStringHandle", - Input: []tf.Input{ - string_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Add all input tensors element wise. -// -// Inputs must be of same size and shape. -// -// ```python -// x = [9, 7, 10] -// tf.math.add_n(x) ==> 26 -// ``` -func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AddN", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts a (possibly batched) CSRSparesMatrix to a SparseTensor. -// -// Arguments: -// sparse_matrix: A (possibly batched) CSRSparseMatrix. -// -// -// Returns: -// indices: SparseTensor indices. -// values: SparseTensor values. -// dense_shape: SparseTensor dense shape. -func CSRSparseMatrixToSparseTensor(scope *Scope, sparse_matrix tf.Output, type_ tf.DataType) (indices tf.Output, values tf.Output, dense_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "CSRSparseMatrixToSparseTensor", - Input: []tf.Input{ - sparse_matrix, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// SizeAttr is an optional argument to Size. -type SizeAttr func(optionalAttr) - -// SizeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func SizeOutType(value tf.DataType) SizeAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Returns the size of a tensor. -// -// This operation returns an integer representing the number of elements in -// `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]] -// size(t) ==> 12 -// ``` -func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Size", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TPUReplicatedInputAttr is an optional argument to TPUReplicatedInput. -type TPUReplicatedInputAttr func(optionalAttr) - -// TPUReplicatedInputIsMirroredVariable sets the optional is_mirrored_variable attribute to value. -// If not specified, defaults to false -func TPUReplicatedInputIsMirroredVariable(value bool) TPUReplicatedInputAttr { - return func(m optionalAttr) { - m["is_mirrored_variable"] = value - } -} - -// TPUReplicatedInputIndex sets the optional index attribute to value. -// If not specified, defaults to -1 -func TPUReplicatedInputIndex(value int64) TPUReplicatedInputAttr { - return func(m optionalAttr) { - m["index"] = value - } -} - -// TPUReplicatedInputIsPacked sets the optional is_packed attribute to value. -// If not specified, defaults to false -func TPUReplicatedInputIsPacked(value bool) TPUReplicatedInputAttr { - return func(m optionalAttr) { - m["is_packed"] = value - } -} - -// Connects N inputs to an N-way replicated TPU computation. -// -// This operation holds a replicated input to a `tpu.replicate()` computation subgraph. -// Each replicated input has the same shape and type alongside the output. -// -// For example: -// ``` -// %a = "tf.opA"() -// %b = "tf.opB"() -// %replicated_input = "tf.TPUReplicatedInput"(%a, %b) -// %computation = "tf.Computation"(%replicated_input) -// ``` -// The above computation has a replicated input of two replicas. -func TPUReplicatedInput(scope *Scope, inputs []tf.Output, optional ...TPUReplicatedInputAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TPUReplicatedInput", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a tensor filled with a scalar value. -// -// This operation creates a tensor of shape `dims` and fills it with `value`. -// -// For example: -// -// ``` -// # Output tensor has shape [2, 3]. -// fill([2, 3], 9) ==> [[9, 9, 9] -// [9, 9, 9]] -// ``` -// -// `tf.fill` differs from `tf.constant` in a few ways: -// -// * `tf.fill` only supports scalar contents, whereas `tf.constant` supports -// Tensor values. -// * `tf.fill` creates an Op in the computation graph that constructs the actual -// Tensor value at runtime. This is in contrast to `tf.constant` which embeds -// the entire Tensor into the graph with a `Const` node. -// * Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes -// based on other runtime Tensors, unlike `tf.constant`. -// -// Arguments: -// dims: 1-D. Represents the shape of the output tensor. -// value: 0-D (scalar). Value to fill the returned tensor. -// -// @compatibility(numpy) -// Equivalent to np.full -// @end_compatibility -func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fill", - Input: []tf.Input{ - dims, value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts a dense tensor to a (possibly batched) CSRSparseMatrix. -// -// Arguments: -// dense_input: A Dense tensor. -// indices: Indices of nonzero elements. -// -// Returns A (possibly batched) CSRSparseMatrix. -func DenseToCSRSparseMatrix(scope *Scope, dense_input tf.Output, indices tf.Output) (sparse_output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DenseToCSRSparseMatrix", - Input: []tf.Input{ - dense_input, indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Fills empty rows in the input 2-D `SparseTensor` with a default value. -// -// The input `SparseTensor` is represented via the tuple of inputs -// (`indices`, `values`, `dense_shape`). The output `SparseTensor` has the -// same `dense_shape` but with indices `output_indices` and values -// `output_values`. -// -// This op inserts a single entry for every row that doesn't have any values. -// The index is created as `[row, 0, ..., 0]` and the inserted value -// is `default_value`. -// -// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values: -// -// [0, 1]: a -// [0, 3]: b -// [2, 0]: c -// [3, 1]: d -// -// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values: -// -// [0, 1]: a -// [0, 3]: b -// [1, 0]: default_value -// [2, 0]: c -// [3, 1]: d -// [4, 0]: default_value -// -// The output `SparseTensor` will be in row-major order and will have the -// same shape as the input. -// -// This op also returns an indicator vector shaped `[dense_shape[0]]` such that -// -// empty_row_indicator[i] = True iff row i was an empty row. -// -// And a reverse index map vector shaped `[indices.shape[0]]` that is used during -// backpropagation, -// -// reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :] -// -// Arguments: -// indices: 2-D. the indices of the sparse tensor. -// values: 1-D. the values of the sparse tensor. -// dense_shape: 1-D. the shape of the sparse tensor. -// default_value: 0-D. default value to insert into location `[row, 0, ..., 0]` -// for rows missing from the input sparse tensor. -// output indices: 2-D. the indices of the filled sparse tensor. -// -// Returns: -// output_indices -// output_values: 1-D. the values of the filled sparse tensor. -// empty_row_indicator: 1-D. whether the dense row was missing in the -// input sparse tensor. -// reverse_index_map: 1-D. a map from the input indices to the output indices. -func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseFillEmptyRows", - Input: []tf.Input{ - indices, values, dense_shape, default_value, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Makes the summary of quantiles for the batch. -// -// An op that takes a list of tensors (one tensor per feature) and outputs the -// quantile summaries for each tensor. -// -// Arguments: -// float_values: float; List of Rank 1 Tensors each containing values for a single feature. -// example_weights: float; Rank 1 Tensor with weights per instance. -// epsilon: float; The required maximum approximation error. -// -// Returns float; List of Rank 2 Tensors each containing the quantile summary -// (value, weight, min_rank, max_rank) of a single feature. -func BoostedTreesMakeQuantileSummaries(scope *Scope, float_values []tf.Output, example_weights tf.Output, epsilon tf.Output) (summaries []tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesMakeQuantileSummaries", - Input: []tf.Input{ - tf.OutputList(float_values), example_weights, epsilon, - }, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil { - scope.UpdateErr("BoostedTreesMakeQuantileSummaries", err) - return - } - return summaries -} - -// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap. -type TakeManySparseFromTensorsMapAttr func(optionalAttr) - -// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` read by this op. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value. -// -// value: The shared name for the `SparseTensorsMap` read by this op. -// It should not be blank; rather the `shared_name` or unique Operation name -// of the Op that created the original `SparseTensorsMap` should be used. -// If not specified, defaults to "" -func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them. -// -// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where -// `N` is the minibatch size and the rows correspond to the output handles of -// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`. The ranks of the -// original `SparseTensor` objects that went into the given input ops must all -// match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension on the left). -// -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the handles represent an input, which is a `[2, 3]` matrix -// representing two original `SparseTensor` objects: -// -// ``` -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// ``` -// -// and -// -// ``` -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// ``` -// -// then the final `SparseTensor` will be: -// -// ``` -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] -// ``` -// -// Arguments: -// sparse_handles: 1-D, The `N` serialized `SparseTensor` objects. -// Shape: `[N]`. -// dtype: The `dtype` of the `SparseTensor` objects stored in the -// `SparseTensorsMap`. -// -// Returns: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TakeManySparseFromTensorsMap", - Input: []tf.Input{ - sparse_handles, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// LoadTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingCenteredRMSPropParameters. -type LoadTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingCenteredRMSPropParametersTableId(value int64) LoadTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingCenteredRMSPropParametersTableName(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingCenteredRMSPropParametersConfig(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load centered RMSProp embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the centered RMSProp optimization algorithm. -// ms: Value of ms used in the centered RMSProp optimization algorithm. -// mom: Value of mom used in the centered RMSProp optimization algorithm. -// mg: Value of mg used in the centered RMSProp optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingCenteredRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingCenteredRMSPropParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingCenteredRMSPropParameters", - Input: []tf.Input{ - parameters, ms, mom, mg, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) - -// RandomPoissonSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomPoissonSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed2(value int64) RandomPoissonAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Use RandomPoissonV2 instead. -// -// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 -func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomPoisson", - Input: []tf.Input{ - shape, rate, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Compute the regularized incomplete beta integral \\(I_x(a, b)\\). -// -// The regularized incomplete beta integral is defined as: -// -// -// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\) -// -// where -// -// -// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\) -// -// -// is the incomplete beta function and \\(B(a, b)\\) is the *complete* -// beta function. -func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Betainc", - Input: []tf.Input{ - a, b, x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Reduces sparse updates into the variable referenced by `resource` using the `max` operation. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = max(ref[indices, ...], updates[...]) -// -// # Vector indices (for each i) -// ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...]) -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions are combined. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterMax", - Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap. -type AddManySparseToTensorsMapAttr func(optionalAttr) - -// AddManySparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value. -// -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles. -// -// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`, where -// -// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R``` -// -// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor` -// having a first `sparse_indices` column taking values between `[0, N)`, where -// the minibatch size `N == sparse_shape[0]`. -// -// The input `SparseTensor` must have rank `R` greater than 1, and the first -// dimension is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The stored -// `SparseTensor` objects pointed to by each row of the output `sparse_handles` -// will have rank `R-1`. -// -// The `SparseTensor` values can then be read out as part of a minibatch by passing -// the given keys as vector elements to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddManySparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// `sparse_indices[:, 0]` must be ordered values in `[0, N)`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -// The minibatch size `N == sparse_shape[0]`. -// -// Returns 1-D. The handles of the `SparseTensor` now stored in the -// `SparseTensorsMap`. Shape: `[N]`. -func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AddManySparseToTensorsMap", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes tan of x element-wise. -// -// Given an input tensor, this function computes tangent of every -// element in the tensor. Input range is `(-inf, inf)` and -// output range is `(-inf, inf)`. If input lies outside the boundary, `nan` -// is returned. -// -// ```python -// x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")]) -// tf.math.tan(x) ==> [nan 0.45231566 -0.5463025 1.5574077 2.572152 -1.7925274 0.32097113 nan] -// ``` -func Tan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tan", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BiasAddGradAttr is an optional argument to BiasAddGrad. -type BiasAddGradAttr func(optionalAttr) - -// BiasAddGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddGradDataFormat(value string) BiasAddGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// The backward operation for "BiasAdd" on the "bias" tensor. -// -// It accumulates all the values from out_backprop into the feature dimension. -// For NHWC data format, the feature dimension is the last. For NCHW data format, -// the feature dimension is the third-to-last. -// -// Arguments: -// out_backprop: Any number of dimensions. -// -// Returns 1-D with size the feature dimension of `out_backprop`. -func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BiasAddGrad", - Input: []tf.Input{ - out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Advance the counter of a counter-based RNG. -// -// The state of the RNG after -// `rng_skip(n)` will be the same as that after `stateful_uniform([n])` -// (or any other distribution). The actual increment added to the -// counter is an unspecified implementation detail. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// delta: The amount of advancement. -// -// Returns the created operation. -func RngSkip(scope *Scope, resource tf.Output, algorithm tf.Output, delta tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RngSkip", - Input: []tf.Input{ - resource, algorithm, delta, - }, - } - return scope.AddOperation(opspec) -} - -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. -// -// For example: -// -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` -// -// Arguments: -// start: 0-D tensor. First entry in the range. -// stop: 0-D tensor. Last entry in the range. -// num: 0-D tensor. Number of values to generate. -// -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LinSpace", - Input: []tf.Input{ - start, stop, num, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) - -// MultinomialSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// MultinomialSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. -// -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Multinomial", - Input: []tf.Input{ - logits, num_samples, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// NonDeterministicIntsAttr is an optional argument to NonDeterministicInts. -type NonDeterministicIntsAttr func(optionalAttr) - -// NonDeterministicIntsDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_INT64 -func NonDeterministicIntsDtype(value tf.DataType) NonDeterministicIntsAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Non-deterministically generates some integers. -// -// This op may use some OS-provided source of non-determinism (e.g. an RNG), so each execution will give different results. -// -// Arguments: -// shape: The shape of the output tensor. -// -// Returns Non-deterministic integer values with specified shape. -func NonDeterministicInts(scope *Scope, shape tf.Output, optional ...NonDeterministicIntsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NonDeterministicInts", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that caches elements from `input_dataset`. -// -// A CacheDataset will iterate over the input_dataset, and store tensors. If the -// cache already exists, the cache will be used. If the cache is inappropriate -// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error -// will the returned when used. -// -// Arguments: -// -// filename: A path on the filesystem where we should cache the dataset. Note: this -// will be a directory. -// -// -func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "CacheDataset", - Input: []tf.Input{ - input_dataset, filename, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ThreadPoolHandleAttr is an optional argument to ThreadPoolHandle. -type ThreadPoolHandleAttr func(optionalAttr) - -// ThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. -// -// value: The maximum degree of parallelism to use within operations that execute on this -// threadpool. -// If not specified, defaults to 1 -func ThreadPoolHandleMaxIntraOpParallelism(value int64) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["max_intra_op_parallelism"] = value - } -} - -// ThreadPoolHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ThreadPoolHandleContainer(value string) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// ThreadPoolHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func ThreadPoolHandleSharedName(value string) ThreadPoolHandleAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// num_threads: The number of threads in the thread pool. -// display_name: A human-readable name for the threads that may be visible in some -// visualizations. -// threadpool. -// -// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset -// ops. -func ThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ThreadPoolHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ThreadPoolHandle", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. -type SparseReduceMaxSparseAttr func(optionalAttr) - -// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the max of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a -// SparseTensor. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseReduceMaxSparse", - Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Computes the maximum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the maximum such that: -// -// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such -// that `segment_ids[j...] == i`. -// -// If the maximum is empty for a given segment ID `i`, it outputs the smallest -// possible value for the specific numeric type, -// `output[i] = numeric_limits::lowest()`. -// -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. -// -//
-// -//
-// -// For example: -// -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 4, 3, 3, 4], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentMax", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringUpperAttr is an optional argument to StringUpper. -type StringUpperAttr func(optionalAttr) - -// StringUpperEncoding sets the optional encoding attribute to value. -// If not specified, defaults to "" -func StringUpperEncoding(value string) StringUpperAttr { - return func(m optionalAttr) { - m["encoding"] = value - } -} - -// Converts all lowercase characters into their respective uppercase replacements. -// -// Example: -// -// >>> tf.strings.upper("CamelCase string and ALL CAPS") -// -// -func StringUpper(scope *Scope, input tf.Output, optional ...StringUpperAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringUpper", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Set a summary_writer_interface to record statistics using given stats_aggregator. -// -// Returns the created operation. -func StatsAggregatorSetSummaryWriter(scope *Scope, stats_aggregator tf.Output, summary tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatsAggregatorSetSummaryWriter", - Input: []tf.Input{ - stats_aggregator, summary, - }, - } - return scope.AddOperation(opspec) -} - -// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. -type FusedBatchNormGradAttr func(optionalAttr) - -// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. -// -// Returns: -// x_backprop: A 4D Tensor for the gradient with respect to x. -// scale_backprop: A 1D Tensor for the gradient with respect to scale. -// offset_backprop: A 1D Tensor for the gradient with respect to offset. -// reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm. -// reserve_space_4: Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormGrad", - Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// Subtracts a value from the current value of a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to -// see the decremented value or a subsequent newer one. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. -// -// Returns the created operation. -func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignSubVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// SparseReduceMaxAttr is an optional argument to SparseReduceMax. -type SparseReduceMaxAttr func(optionalAttr) - -// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the max of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_max()`. In particular, this Op also returns a dense `Tensor` -// instead of a sparse one. -// -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. -// -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. -// -// Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -// -// Returns `R-K`-D. The reduced Tensor. -func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseReduceMax", - Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// strong_hash: boolean, if true, siphash with salt will be used instead of farmhash. -// salt: Specify the salt that will be used by the siphash function. -// -// Returns: -// output_indices: 2-D. Indices of the concatenated `SparseTensor`. -// output_values: 1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`. -// output_shape: 1-D. Shape of the concatenated `SparseTensor`. -func SparseCrossHashed(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, num_buckets tf.Output, strong_hash tf.Output, salt tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseCrossHashed", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), num_buckets, strong_hash, salt, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm. -type QuantizedInstanceNormAttr func(optionalAttr) - -// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value. -// -// value: If True, `given_y_min` and `given_y_min` -// and `given_y_max` are used as the output range. Otherwise, -// the implementation computes the output range. -// If not specified, defaults to false -func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["output_range_given"] = value - } -} - -// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value. -// -// value: Output in `y_min` if `output_range_given` is True. -// If not specified, defaults to 0 -func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["given_y_min"] = value - } -} - -// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value. -// -// value: Output in `y_max` if `output_range_given` is True. -// If not specified, defaults to 0 -func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["given_y_max"] = value - } -} - -// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value. -// -// value: A small float number to avoid dividing by 0. -// If not specified, defaults to 1e-05 -func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["variance_epsilon"] = value - } -} - -// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value. -// -// value: Minimum value of `y_max - y_min` -// If not specified, defaults to 0.001 -func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr { - return func(m optionalAttr) { - m["min_separation"] = value - } -} - -// Quantized Instance normalization. -// -// Arguments: -// x: A 4D input Tensor. -// x_min: The value represented by the lowest quantized input. -// x_max: The value represented by the highest quantized input. -// -// Returns: -// y: A 4D Tensor. -// y_min: The value represented by the lowest quantized output. -// y_max: The value represented by the highest quantized output. -func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedInstanceNorm", - Input: []tf.Input{ - x, x_min, x_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// FusedBatchNormV3Attr is an optional argument to FusedBatchNormV3. -type FusedBatchNormV3Attr func(optionalAttr) - -// FusedBatchNormV3Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormV3Epsilon(value float32) FusedBatchNormV3Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormV3ExponentialAvgFactor sets the optional exponential_avg_factor attribute to value. -// If not specified, defaults to 1 -func FusedBatchNormV3ExponentialAvgFactor(value float32) FusedBatchNormV3Attr { - return func(m optionalAttr) { - m["exponential_avg_factor"] = value - } -} - -// FusedBatchNormV3DataFormat sets the optional data_format attribute to value. -// -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormV3DataFormat(value string) FusedBatchNormV3Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormV3IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormV3IsTraining(value bool) FusedBatchNormV3Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. -// -// Returns: -// y: A 4D Tensor for output data. -// batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean. -// batch_variance: A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance. -// reserve_space_1: A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation. -// reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -// reserve_space_3: A 1D Tensor for some intermediate results, to be reused in the gradient -// computation for better efficiency. -func FusedBatchNormV3(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV3Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormV3", - Input: []tf.Input{ - x, scale, offset, mean, variance, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5) -} - -// Computes reciprocal of square root of x element-wise. -// -// I.e., \\(y = 1 / \sqrt{x}\\). -func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rsqrt", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// The gradient operator for the SparseSlice op. -// -// This op takes in the upstream gradient w.r.t. non-empty values of -// the sliced `SparseTensor`, and outputs the gradients w.r.t. -// the non-empty values of input `SparseTensor`. -// -// Arguments: -// backprop_val_grad: 1-D. The gradient with respect to -// the non-empty values of the sliced `SparseTensor`. -// input_indices: 2-D. The `indices` of the input `SparseTensor`. -// input_start: 1-D. tensor represents the start of the slice. -// output_indices: 2-D. The `indices` of the sliced `SparseTensor`. -// -// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`. -func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSliceGrad", - Input: []tf.Input{ - backprop_val_grad, input_indices, input_start, output_indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// sep: string used when joining a list of string inputs, can be used as separator later. -// -// Returns: -// output_indices: 2-D. Indices of the concatenated `SparseTensor`. -// output_values: 1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`. -// output_shape: 1-D. Shape of the concatenated `SparseTensor`. -func SparseCrossV2(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, sep tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseCrossV2", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), sep, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Pads a tensor with mirrored values. -// -// This operation pads a `input` with mirrored values according to the `paddings` -// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is -// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many values to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many values to add after the contents of `input` -// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater -// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true -// (if false, respectively). -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6]]. -// # 'paddings' is [[1, 1]], [2, 2]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2] -// [2, 1, 1, 2, 3, 3, 2] -// [5, 4, 4, 5, 6, 6, 5] -// [5, 4, 4, 5, 6, 6, 5]] -// ``` -// -// Arguments: -// input: The input tensor to be padded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions -// do not include the borders, while in symmetric mode the padded regions -// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings` -// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and -// it is `[1, 2, 3, 3, 2]` in symmetric mode. -// -// Returns The padded tensor. -func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV3Attr is an optional argument to TensorArrayV3. -type TensorArrayV3Attr func(optionalAttr) - -// TensorArrayV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. -// -// value: A boolean that determines whether writes to the TensorArray -// are allowed to grow the size. By default, this is not allowed. -// If not specified, defaults to false -func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } -} - -// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. -// -// value: If true (default), Tensors in the TensorArray are cleared -// after being read. This disables multiple read semantics but allows early -// release of memory. -// If not specified, defaults to true -func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} - -// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. -// -// value: If true (default is false), then all -// elements in the TensorArray will be expected to have have identical shapes. -// This allows certain behaviors, like dynamically checking for -// consistent shapes on write, and being able to fill in properly -// shaped zero tensors on stack -- even if the element_shape attribute -// is not fully defined. -// If not specified, defaults to false -func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["identical_element_shapes"] = value - } -} - -// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. -// -// value: Overrides the name used for the temporary tensor_array -// resource. Default value is the name of the 'TensorArray' op (which -// is guaranteed unique). -// If not specified, defaults to "" -func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { - return func(m optionalAttr) { - m["tensor_array_name"] = value - } -} - -// An array of Tensors of given size. -// -// Write data via Write and read via Read or Pack. -// -// Arguments: -// size: The size of the array. -// dtype: The type of the elements on the tensor_array. -// -// Returns: -// handle: The handle to the TensorArray. -// flow: A scalar used to control gradient flow. -func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayV3", - Input: []tf.Input{ - size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) - -// MatrixSolveLsFast sets the optional fast attribute to value. -// If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { - return func(m optionalAttr) { - m["fast"] = value - } -} - -// Solves one or more linear least-squares problems. -// -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. -// -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: -// -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). -// -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). -// If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is -// sufficiently large. -// -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. -// -// Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. -// -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility -// -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolveLs", - Input: []tf.Input{ - matrix, rhs, l2_regularizer, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are -// -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// hashed_output: If true, returns the hash of the cross instead of the string. -// This will allow us avoiding string manipulations. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` -// function to combine the crosses fingerprints. -// -// -// -// Returns: -// output_indices: 2-D. Indices of the concatenated `SparseTensor`. -// output_values: 1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`. -// output_shape: 1-D. Shape of the concatenated `SparseTensor`. -func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} - opspec := tf.OpSpec{ - Type: "SparseCross", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShardedFilespec", - Input: []tf.Input{ - basename, num_shards, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingProximalAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParameters. -type RetrieveTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingProximalAdagradParametersTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingProximalAdagradParametersTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingProximalAdagradParametersConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve proximal Adagrad embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm. -// accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm. -func RetrieveTPUEmbeddingProximalAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingProximalAdagradParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ReduceJoinAttr is an optional argument to ReduceJoin. -type ReduceJoinAttr func(optionalAttr) - -// ReduceJoinKeepDims sets the optional keep_dims attribute to value. -// -// value: If `True`, retain reduced dimensions with length `1`. -// If not specified, defaults to false -func ReduceJoinKeepDims(value bool) ReduceJoinAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// ReduceJoinSeparator sets the optional separator attribute to value. -// -// value: The separator to use when joining. -// If not specified, defaults to "" -func ReduceJoinSeparator(value string) ReduceJoinAttr { - return func(m optionalAttr) { - m["separator"] = value - } -} - -// Joins a string Tensor across the given dimensions. -// -// Computes the string join across dimensions in the given string Tensor of shape -// `[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input -// strings with the given separator (default: empty string). Negative indices are -// counted backwards from the end, with `-1` being equivalent to `n - 1`. If -// indices are not specified, joins across all dimensions beginning from `n - 1` -// through `0`. -// -// For example: -// -// ```python -// # tensor `a` is [["a", "b"], ["c", "d"]] -// tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] -// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] -// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] -// tf.reduce_join(a, [0, 1]) ==> "acbd" -// tf.reduce_join(a, [1, 0]) ==> "abcd" -// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]] -// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd" -// ``` -// -// Arguments: -// inputs: The input to be joined. All reduced indices must have non-zero size. -// reduction_indices: The dimensions to reduce over. Dimensions are reduced in the -// order specified. Omitting `reduction_indices` is equivalent to passing -// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. -// -// Returns Has shape equal to that of the input with reduced dimensions removed or -// set to `1` depending on `keep_dims`. -func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ReduceJoin", - Input: []tf.Input{ - inputs, reduction_indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse 2D fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform over the -// inner-most 2 dimensions of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their inverse 2D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft2 -// @end_compatibility -func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT2D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concatenates a list of `SparseTensor` along the specified dimension. -// -// Concatenation is with respect to the dense versions of these sparse tensors. -// It is assumed that each input is a `SparseTensor` whose elements are ordered -// along increasing dimension number. -// -// All inputs' shapes must match, except for the concat dimension. The -// `indices`, `values`, and `shapes` lists must have the same length. -// -// The output shape is identical to the inputs', except along the concat -// dimension, where it is the sum of the inputs' sizes along that dimension. -// -// The output elements will be resorted to preserve the sort order along -// increasing dimension number. -// -// This op runs in `O(M log M)` time, where `M` is the total number of non-empty -// values across all inputs. This is due to the need for an internal sort in -// order to concatenate efficiently across an arbitrary dimension. -// -// For example, if `concat_dim = 1` and the inputs are -// -// sp_inputs[0]: shape = [2, 3] -// [0, 2]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// sp_inputs[1]: shape = [2, 4] -// [0, 1]: "d" -// [0, 2]: "e" -// -// then the output will be -// -// shape = [2, 7] -// [0, 2]: "a" -// [0, 4]: "d" -// [0, 5]: "e" -// [1, 0]: "b" -// [1, 1]: "c" -// -// Graphically this is equivalent to doing -// -// [ a] concat [ d e ] = [ a d e ] -// [b c ] [ ] [b c ] -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. Non-empty values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), -// where rank is the number of dimensions in each input `SparseTensor`. -// -// Returns: -// output_indices: 2-D. Indices of the concatenated `SparseTensor`. -// output_values: 1-D. Non-empty values of the concatenated `SparseTensor`. -// output_shape: 1-D. Shape of the concatenated `SparseTensor`. -func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"concat_dim": concat_dim} - opspec := tf.OpSpec{ - Type: "SparseConcat", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. -type DestroyResourceOpAttr func(optionalAttr) - -// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. -// -// value: whether to ignore the error when the resource -// doesn't exist. -// If not specified, defaults to true -func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { - return func(m optionalAttr) { - m["ignore_lookup_error"] = value - } -} - -// Deletes the resource specified by the handle. -// -// All subsequent operations using the resource will result in a NotFound -// error status. -// -// Arguments: -// resource: handle to the resource to delete. -// -// Returns the created operation. -func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DestroyResourceOp", - Input: []tf.Input{ - resource, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. -type HistogramFixedWidthAttr func(optionalAttr) - -// HistogramFixedWidthDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT32 -func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Return histogram of values. -// -// Given the tensor `values`, this operation returns a rank 1 histogram counting -// the number of entries in `values` that fall into every bin. The bins are -// equal width and determined by the arguments `value_range` and `nbins`. -// -// ```python -// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) -// nbins = 5 -// value_range = [0.0, 5.0] -// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] -// -// with tf.get_default_session() as sess: -// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) -// variables.global_variables_initializer().run() -// sess.run(hist) => [2, 1, 1, 0, 2] -// ``` -// -// Arguments: -// values: Numeric `Tensor`. -// value_range: Shape [2] `Tensor` of same `dtype` as `values`. -// values <= value_range[0] will be mapped to hist[0], -// values >= value_range[1] will be mapped to hist[-1]. -// nbins: Scalar `int32 Tensor`. Number of histogram bins. -// -// Returns A 1-D `Tensor` holding histogram of values. -func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HistogramFixedWidth", - Input: []tf.Input{ - values, value_range, nbins, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. -// -// Arguments: -// -// thread_pool: A resource produced by the ThreadPoolHandle op. -// -// -func ThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ThreadPoolDataset", - Input: []tf.Input{ - input_dataset, thread_pool, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Bitcasts a tensor from one type to another without copying data. -// -// Given a tensor `input`, this operation returns a tensor that has the same buffer -// data as `input` with datatype `type`. -// -// If the input datatype `T` is larger than the output datatype `type` then the -// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)]. -// -// If `T` is smaller than `type`, the operator requires that the rightmost -// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from -// [..., sizeof(`type`)/sizeof(`T`)] to [...]. -// -// tf.bitcast() and tf.cast() work differently when real dtype is casted as a complex dtype -// (e.g. tf.complex64 or tf.complex128) as tf.cast() make imaginary part 0 while tf.bitcast() -// gives module error. -// For example, -// -// Example 1: -// -// >>> a = [1., 2., 3.] -// >>> equality_bitcast = tf.bitcast(a, tf.complex128) -// Traceback (most recent call last): -// ... -// InvalidArgumentError: Cannot bitcast from 1 to 18 [Op:Bitcast] -// >>> equality_cast = tf.cast(a, tf.complex128) -// >>> print(equality_cast) -// tf.Tensor([1.+0.j 2.+0.j 3.+0.j], shape=(3,), dtype=complex128) -// -// Example 2: -// -// >>> tf.bitcast(tf.constant(0xffffffff, dtype=tf.uint32), tf.uint8) -// -// -// Example 3: -// -// >>> x = [1., 2., 3.] -// >>> y = [0., 2., 3.] -// >>> equality= tf.equal(x,y) -// >>> equality_cast = tf.cast(equality,tf.float32) -// >>> equality_bitcast = tf.bitcast(equality_cast,tf.uint8) -// >>> print(equality) -// tf.Tensor([False True True], shape=(3,), dtype=bool) -// >>> print(equality_cast) -// tf.Tensor([0. 1. 1.], shape=(3,), dtype=float32) -// >>> print(equality_bitcast) -// tf.Tensor( -// [[ 0 0 0 0] -// [ 0 0 128 63] -// [ 0 0 128 63]], shape=(3, 4), dtype=uint8) -// -// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different -// endian orderings will give different results. -func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "Bitcast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA. -type ResourceApplyAdagradDAAttr func(optionalAttr) - -// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the proximal adagrad scheme. -// -// Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagradDA", - Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// SparseToDenseAttr is an optional argument to SparseToDense. -type SparseToDenseAttr func(optionalAttr) - -// SparseToDenseValidateIndices sets the optional validate_indices attribute to value. -// -// value: If true, indices are checked to make sure they are sorted in -// lexicographic order and that there are no repeats. -// If not specified, defaults to true -func SparseToDenseValidateIndices(value bool) SparseToDenseAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Converts a sparse representation into a dense tensor. -// -// Builds an array `dense` with shape `output_shape` such that -// -// ``` -// # If sparse_indices is scalar -// dense[i] = (i == sparse_indices ? sparse_values : default_value) -// -// # If sparse_indices is a vector, then for each i -// dense[sparse_indices[i]] = sparse_values[i] -// -// # If sparse_indices is an n by d matrix, then for each i in [0, n) -// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i] -// ``` -// -// All other values in `dense` are set to `default_value`. If `sparse_values` is a -// scalar, all sparse indices are set to this single value. -// -// Indices should be sorted in lexicographic order, and indices must not -// contain any repeats. If `validate_indices` is true, these properties -// are checked during execution. -// -// Arguments: -// sparse_indices: 0-D, 1-D, or 2-D. `sparse_indices[i]` contains the complete -// index where `sparse_values[i]` will be placed. -// output_shape: 1-D. Shape of the dense output tensor. -// sparse_values: 1-D. Values corresponding to each row of `sparse_indices`, -// or a scalar value to be used for all sparse indices. -// default_value: Scalar value to set for indices not specified in -// `sparse_indices`. -// -// Returns Dense output tensor of shape `output_shape`. -func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseToDense", - Input: []tf.Input{ - sparse_indices, output_shape, sparse_values, default_value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapClearAttr is an optional argument to OrderedMapClear. -type OrderedMapClearAttr func(optionalAttr) - -// OrderedMapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapClearCapacity(value int64) OrderedMapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapClearContainer(value string) OrderedMapClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapClearSharedName(value string) OrderedMapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// MaxPoolAttr is an optional argument to MaxPool. -type MaxPoolAttr func(optionalAttr) - -// MaxPoolDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolDataFormat(value string) MaxPoolAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs max pooling on the input. -// -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns The max pooled output tensor. -func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// BlockLSTMAttr is an optional argument to BlockLSTM. -type BlockLSTMAttr func(optionalAttr) - -// BlockLSTMForgetBias sets the optional forget_bias attribute to value. -// -// value: The forget gate bias. -// If not specified, defaults to 1 -func BlockLSTMForgetBias(value float32) BlockLSTMAttr { - return func(m optionalAttr) { - m["forget_bias"] = value - } -} - -// BlockLSTMCellClip sets the optional cell_clip attribute to value. -// -// value: Value to clip the 'cs' value to. -// If not specified, defaults to 3 -func BlockLSTMCellClip(value float32) BlockLSTMAttr { - return func(m optionalAttr) { - m["cell_clip"] = value - } -} - -// BlockLSTMUsePeephole sets the optional use_peephole attribute to value. -// -// value: Whether to use peephole weights. -// If not specified, defaults to false -func BlockLSTMUsePeephole(value bool) BlockLSTMAttr { - return func(m optionalAttr) { - m["use_peephole"] = value - } -} - -// Computes the LSTM cell forward propagation for all the time steps. -// -// This is equivalent to applying LSTMBlockCell in a loop, like so: -// -// ```python -// for x1 in unpack(x): -// i1, cs1, f1, o1, ci1, co1, h1 = LSTMBlock( -// x1, cs_prev, h_prev, w, wci, wcf, wco, b) -// cs_prev = cs1 -// h_prev = h1 -// i.append(i1) -// cs.append(cs1) -// f.append(f1) -// o.append(o1) -// ci.append(ci1) -// co.append(co1) -// h.append(h1) -// return pack(i), pack(cs), pack(f), pack(o), pack(ci), pack(ch), pack(h) -// ``` -// -// Arguments: -// seq_len_max: Maximum time length actually used by this input. Outputs are padded -// with zeros beyond this length. -// x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs). -// cs_prev: Value of the initial cell state. -// h_prev: Initial output of cell (to be used for peephole). -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// -// Returns: -// i: The input gate over the whole time sequence. -// cs: The cell state before the tanh over the whole time sequence. -// f: The forget gate over the whole time sequence. -// o: The output gate over the whole time sequence. -// ci: The cell input over the whole time sequence. -// co: The cell after the tanh over the whole time sequence. -// h: The output h vector over the whole time sequence. -func BlockLSTM(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...BlockLSTMAttr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "BlockLSTM", - Input: []tf.Input{ - seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// Computes the GRU cell forward propagation for 1 time step. -// -// Args -// x: Input to the GRU cell. -// h_prev: State input from the previous GRU cell. -// w_ru: Weight matrix for the reset and update gate. -// w_c: Weight matrix for the cell connection gate. -// b_ru: Bias vector for the reset and update gate. -// b_c: Bias vector for the cell connection gate. -// -// Returns -// r: Output of the reset gate. -// u: Output of the update gate. -// c: Output of the cell connection gate. -// h: Current state of the GRU cell. -// -// Note on notation of the variables: -// -// Concatenation of a and b is represented by a_b -// Element-wise dot product of a and b is represented by ab -// Element-wise dot product is represented by \circ -// Matrix multiplication is represented by * -// -// Biases are initialized with : -// `b_ru` - constant_initializer(1.0) -// `b_c` - constant_initializer(0.0) -// -// This kernel op implements the following mathematical equations: -// -// ``` -// x_h_prev = [x, h_prev] -// -// [r_bar u_bar] = x_h_prev * w_ru + b_ru -// -// r = sigmoid(r_bar) -// u = sigmoid(u_bar) -// -// h_prevr = h_prev \circ r -// -// x_h_prevr = [x h_prevr] -// -// c_bar = x_h_prevr * w_c + b_c -// c = tanh(c_bar) -// -// h = (1-u) \circ c + u \circ h_prev -// ``` -func GRUBlockCell(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output) (r tf.Output, u tf.Output, c tf.Output, h tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GRUBlockCell", - Input: []tf.Input{ - x, h_prev, w_ru, w_c, b_ru, b_c, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Deserialize and concatenate `SparseTensors` from a serialized minibatch. -// -// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where -// `N` is the minibatch size and the rows correspond to packed outputs of -// `SerializeSparse`. The ranks of the original `SparseTensor` objects -// must all match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension). -// -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and -// -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// -// then the final deserialized `SparseTensor` will be: -// -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] -// -// Arguments: -// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. -// Must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "DeserializeManySparse", - Input: []tf.Input{ - serialized_sparse, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// MapStageAttr is an optional argument to MapStage. -type MapStageAttr func(optionalAttr) - -// MapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapStageCapacity(value int64) MapStageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapStageMemoryLimit(value int64) MapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapStageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func MapStageContainer(value string) MapStageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapStageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func MapStageSharedName(value string) MapStageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Stage (key, values) in the underlying container which behaves like a hashtable. -// -// Arguments: -// key: int64 -// -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// -// Returns the created operation. -func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...MapStageAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapStage", - Input: []tf.Input{ - key, indices, tf.OutputList(values), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2. -type FusedBatchNormGradV2Attr func(optionalAttr) - -// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value. -// -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value. -// -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. -// -// Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. -// -// Returns: -// x_backprop: A 4D Tensor for the gradient with respect to x. -// scale_backprop: A 1D Tensor for the gradient with respect to scale. -// offset_backprop: A 1D Tensor for the gradient with respect to offset. -// reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm. -// reserve_space_4: Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedBatchNormGradV2", - Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. -type SparseTensorDenseMatMulAttr func(optionalAttr) - -// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. -// -// value: Use the adjoint of A in the matrix multiply. If A is complex, this -// is transpose(conj(A)). Otherwise it's transpose(A). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_a"] = value - } -} - -// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. -// -// value: Use the adjoint of B in the matrix multiply. If B is complex, this -// is transpose(conj(B)). Otherwise it's transpose(B). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { - return func(m optionalAttr) { - m["adjoint_b"] = value - } -} - -// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". -// -// No validity checking is performed on the indices of A. However, the following -// input format is recommended for optimal behavior: -// -// if adjoint_a == false: -// A should be sorted in lexicographically increasing order. Use SparseReorder -// if you're not sure. -// if adjoint_a == true: -// A should be sorted in order of increasing dimension 1 (i.e., "column major" -// order instead of "row major" order). -// -// Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. -// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. -// b: 2-D. A dense Matrix. -func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseTensorDenseMatMul", - Input: []tf.Input{ - a_indices, a_values, a_shape, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Sets the index-th position of the list to contain the given tensor. -// -// input_handle: the list -// index: the position in the list to which the tensor will be assigned -// item: the element to be assigned to that position -// output_handle: the new list, with the element in the proper position -// -func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListSetItem", - Input: []tf.Input{ - input_handle, index, item, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Writes the given dataset to the given file using the TFRecord format. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to write. -// filename: A scalar string tensor representing the filename to use. -// compression_type: A scalar string tensor containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// -// Returns the created operation. -func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DatasetToTFRecord", - Input: []tf.Input{ - input_dataset, filename, compression_type, - }, - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. -type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. -// -// Returns the created operation. -func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyCenteredRMSProp", - Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Elementwise computes the bitwise XOR of `x` and `y`. -// -// The result will have those bits set, that are different in `x` and `y`. The -// computation is performed on the underlying representations of `x` and `y`. -// -// For example: -// -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64, -// tf.uint8, tf.uint16, tf.uint32, tf.uint64] -// -// for dtype in dtype_list: -// lhs = tf.constant([0, 5, 3, 14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// exp = tf.constant([5, 5, 4, 5], dtype=tf.float32) -// -// res = bitwise_ops.bitwise_xor(lhs, rhs) -// tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE -// ``` -// -func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BitwiseXor", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds two `SparseTensor` objects to produce another `SparseTensor`. -// -// The input `SparseTensor` objects' indices are assumed ordered in standard -// lexicographic order. If this is not the case, before this step run -// `SparseReorder` to restore index ordering. -// -// By default, if two values sum to zero at some index, the output `SparseTensor` -// would still include that particular location in its index, storing a zero in the -// corresponding value slot. To override this, callers can specify `thresh`, -// indicating that if the sum has a magnitude strictly smaller than `thresh`, its -// corresponding value and index would then not be included. In particular, -// `thresh == 0` (default) means everything is kept and actual thresholding happens -// only for a positive value. -// -// In the following shapes, `nnz` is the count after taking `thresh` into account. -// -// Arguments: -// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. -// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. -// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. -// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. -// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -// thresh: 0-D. The magnitude threshold that determines if an output value/index -// pair takes space. -func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseAdd", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: -// -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` -// -// Arguments: -// value: The tensor to be shuffled. -// -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffle", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Selects elements from `x` or `y`, depending on `condition`. -// -// The `x`, and `y` tensors must all have the same shape, and the -// output will also have that shape. -// -// The `condition` tensor must be a scalar if `x` and `y` are scalars. -// If `x` and `y` are vectors or higher rank, then `condition` must be either a -// scalar, a vector with size matching the first dimension of `x`, or must have -// the same shape as `x`. -// -// The `condition` tensor acts as a mask that chooses, based on the value at each -// element, whether the corresponding element / row in the output should be -// taken from `x` (if true) or `y` (if false). -// -// If `condition` is a vector and `x` and `y` are higher rank matrices, then -// it chooses which row (outer dimension) to copy from `x` and `y`. -// If `condition` has the same shape as `x` and `y`, then it chooses which -// element to copy from `x` and `y`. -// -// For example: -// -// ```python -// # 'condition' tensor is [[True, False] -// # [False, True]] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) # => [[1, 6], [7, 4]] -// -// -// # 'condition' tensor is [True, False] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) ==> [[1, 2], -// [7, 8]] -// -// ``` -// -// Arguments: -// -// x: = A `Tensor` which may have the same shape as `condition`. -// If `condition` is rank 1, `x` may have higher rank, -// but its first dimension must match the size of `condition`. -// y: = A `Tensor` with the same type and shape as `x`. -// -// Returns = A `Tensor` with the same type and shape as `x` and `y`. -func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Select", - Input: []tf.Input{ - condition, x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// The gradient operator for the SparseAdd op. -// -// The SparseAdd op calculates A + B, where A, B, and the sum are all represented -// as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. -// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty -// values of A and B. -// -// Arguments: -// backprop_val_grad: 1-D with shape `[nnz(sum)]`. The gradient with respect to -// the non-empty values of the sum. -// a_indices: 2-D. The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`. -// b_indices: 2-D. The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`. -// sum_indices: 2-D. The `indices` of the sum `SparseTensor`, size -// `[nnz(sum), ndims]`. -// -// Returns: -// a_val_grad: 1-D with shape `[nnz(A)]`. The gradient with respect to the -// non-empty values of A. -// b_val_grad: 1-D with shape `[nnz(B)]`. The gradient with respect to the -// non-empty values of B. -func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseAddGrad", - Input: []tf.Input{ - backprop_val_grad, a_indices, b_indices, sum_indices, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Removes keys and its associated values from a table. -// -// The tensor `keys` must of the same type as the keys of the table. Keys not -// already in the table are silently ignored. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys of the elements to remove. -// -// Returns the created operation. -func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableRemoveV2", - Input: []tf.Input{ - table_handle, keys, - }, - } - return scope.AddOperation(opspec) -} - -// NotEqualAttr is an optional argument to NotEqual. -type NotEqualAttr func(optionalAttr) - -// NotEqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. -// If not specified, defaults to true -func NotEqualIncompatibleShapeError(value bool) NotEqualAttr { - return func(m optionalAttr) { - m["incompatible_shape_error"] = value - } -} - -// Returns the truth value of (x != y) element-wise. -// -// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func NotEqual(scope *Scope, x tf.Output, y tf.Output, optional ...NotEqualAttr) (z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "NotEqual", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the complementary error function of `x` element-wise. -func Erfc(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Erfc", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomUniformIntAttr is an optional argument to RandomUniformInt. -type RandomUniformIntAttr func(optionalAttr) - -// RandomUniformIntSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformIntSeed(value int64) RandomUniformIntAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformIntSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random integers from a uniform distribution. -// -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. -// -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). -// -// Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. -// -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomUniformInt", - Input: []tf.Input{ - shape, minval, maxval, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StridedSliceGradAttr is an optional argument to StridedSliceGrad. -type StridedSliceGradAttr func(optionalAttr) - -// StridedSliceGradBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// StridedSliceGradEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradEndMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} - -// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["shrink_axis_mask"] = value - } -} - -// Returns the gradient of `StridedSlice`. -// -// Since `StridedSlice` cuts out pieces of its `input` which is size -// `shape`, its gradient will have the same shape (which is passed here -// as `shape`). The gradient will be zero in any element that the slice -// does not select. -// -// Arguments are the same as StridedSliceGrad with the exception that -// `dy` is the input gradient to be propagated and `shape` is the -// shape of `StridedSlice`'s `input`. -func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StridedSliceGrad", - Input: []tf.Input{ - shape, begin, end, strides, dy, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. -type ResourceApplyFtrlAttr func(optionalAttr) - -// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyFtrlMultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. -// If not specified, defaults to false -func ResourceApplyFtrlMultiplyLinearByLr(value bool) ResourceApplyFtrlAttr { - return func(m optionalAttr) { - m["multiply_linear_by_lr"] = value - } -} - -// Update '*var' according to the Ftrl-proximal scheme. -// -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyFtrl", - Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Creates a dataset that contains the unique elements of `input_dataset`. -func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalUniqueDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringFormatAttr is an optional argument to StringFormat. -type StringFormatAttr func(optionalAttr) - -// StringFormatTemplate sets the optional template attribute to value. -// -// value: A string, the template to format tensor summaries into. -// If not specified, defaults to "%s" -func StringFormatTemplate(value string) StringFormatAttr { - return func(m optionalAttr) { - m["template"] = value - } -} - -// StringFormatPlaceholder sets the optional placeholder attribute to value. -// -// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted. -// If not specified, defaults to "%s" -func StringFormatPlaceholder(value string) StringFormatAttr { - return func(m optionalAttr) { - m["placeholder"] = value - } -} - -// StringFormatSummarize sets the optional summarize attribute to value. -// -// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. -// If not specified, defaults to 3 -func StringFormatSummarize(value int64) StringFormatAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Formats a string template using a list of tensors. -// -// Formats a string template using a list of tensors, pretty-printing tensor summaries. -// -// Arguments: -// inputs: The list of tensors to format into the placeholder string. -// -// Returns = The resulting string scalar. -func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringFormat", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts a SparseTensor to a (possibly batched) CSRSparseMatrix. -// -// Arguments: -// indices: SparseTensor indices. -// values: SparseTensor values. -// dense_shape: SparseTensor dense shape. -// -// Returns A (possibly batched) CSRSparseMatrix. -func SparseTensorToCSRSparseMatrix(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (sparse_matrix tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseTensorToCSRSparseMatrix", - Input: []tf.Input{ - indices, values, dense_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes fingerprints of the input strings. -// -// Arguments: -// input: vector of strings to compute fingerprints on. -// -// Returns a (N,2) shaped matrix where N is the number of elements in the input -// vector. Each row contains the low and high parts of the fingerprint. -func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SdcaFprint", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample. -type ParseSequenceExampleAttr func(optionalAttr) - -// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_sparse"] = value - } -} - -// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_dense"] = value - } -} - -// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_sparse"] = value - } -} - -// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_dense"] = value - } -} - -// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. -// -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value - } -} - -// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. -// -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. -// -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value - } -} - -// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. -// -// Arguments: -// serialized: A vector containing binary serialized SequenceExample protos. -// debug_name: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no name is available. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -// feature_list_dense_missing_assumed_empty: A vector listing the -// FeatureList keys which may be missing from the SequenceExamples. If the -// associated FeatureList is missing, it is treated as empty. By default, -// any FeatureList not listed in this vector must exist in the SequenceExamples. -// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). -// The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' context features associated with -// dense values. -// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors -// (scalars). The keys expected in the FeatureLists associated with sparse -// values. -// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ParseSequenceExample", - Input: []tf.Input{ - serialized, debug_name, tf.OutputList(context_dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths -} - -// Returns true if queue is closed. -// -// This operation returns true if the queue is closed and false if the queue -// is open. -// -// Arguments: -// handle: The handle to a queue. -func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QueueIsClosedV2", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT3D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. -type QueueDequeueUpToV2Attr func(optionalAttr) - -// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue has fewer than n elements, this operation -// will block for up to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Dequeues `n` tuples of one or more tensors from the given queue. -// -// This operation is not supported by all queues. If a queue does not support -// DequeueUpTo, then an Unimplemented error is returned. -// -// If the queue is closed and there are more than 0 but less than `n` -// elements remaining, then instead of returning an OutOfRange error like -// QueueDequeueMany, less than `n` elements are returned immediately. If -// the queue is closed and there are 0 elements left in the queue, then -// an OutOfRange error is returned just like in QueueDequeueMany. -// Otherwise the behavior is identical to QueueDequeueMany: -// -// This operation concatenates queue-element component tensors along the -// 0th dimension to make a single component tensor. All of the components -// in the dequeued tuple will have size n in the 0th dimension. -// -// This operation has `k` outputs, where `k` is the number of components in -// the tuples stored in the given queue, and output `i` is the ith -// component of the dequeued tuple. -// -// Arguments: -// handle: The handle to a queue. -// n: The number of tuples to dequeue. -// component_types: The type of each component in a tuple. -// -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QueueDequeueUpToV2", - Input: []tf.Input{ - handle, n, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueUpToV2", err) - return - } - return components -} - -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed Precision at `k` as a `bool Tensor`. -func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"k": k} - opspec := tf.OpSpec{ - Type: "InTopK", - Input: []tf.Input{ - predictions, targets, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x - y element-wise. -// -// *NOTE*: `Subtract` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sub", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. -type FusedResizeAndPadConv2DAttr func(optionalAttr) - -// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { - return func(m optionalAttr) { - m["resize_align_corners"] = value - } -} - -// Performs a resize and padding as a preprocess during a convolution. -// -// It's often possible to do spatial transformations more efficiently as part of -// the packing stage of a convolution, so this op allows for an optimized -// implementation where these stages are fused together. This prevents the need to -// write out the intermediate results as whole tensors, reducing memory pressure, -// and we can get some latency gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and defaults to -// 'NHWC' order. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FusedResizeAndPadConv2D", - Input: []tf.Input{ - input, size, paddings, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the product along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the product of all -// entries belonging to a segment such that: -// -// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples -// `j...` such that `segment_ids[j...] == i`. -// -// For example: -// -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_prod(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 4, 6, 6, 4], -// # [5, 6, 7, 8]] -// ``` -// -// If there is no entry for a given segment ID `i`, it outputs 1. -// -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentProd", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel. -type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Fake-quantize the 'inputs' tensor of type float via per-channel floats -// -// Fake-quantize the `inputs` tensor of type float per-channel and one of the -// shapes: `[d]`, `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` -// of shape `[d]` to `outputs` tensor of same shape as `inputs`. -// -// Attributes -// -// * `[min; max]` define the clamping range for the `inputs` data. -// * `inputs` values are quantized into the quantization range ( -// `[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` -// when it is true) and then de-quantized and output as floats in `[min; max]` -// interval. -// * `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. -// -// Before quantization, `min` and `max` values are adjusted with the following -// logic. -// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values, -// the behavior can be unexpected: -// -// * If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`. -// * If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`. -// * If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `, -// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`. -// -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsPerChannel", - Input: []tf.Input{ - inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Checks whether a resource handle-based variable has been initialized. -// -// Arguments: -// resource: the input resource handle. -// -// Returns a scalar boolean which is true if the variable has been -// initialized. -func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "VarIsInitializedOp", - Input: []tf.Input{ - resource, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AngleAttr is an optional argument to Angle. -type AngleAttr func(optionalAttr) - -// AngleTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func AngleTout(value tf.DataType) AngleAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the argument of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the argument of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part. -// -// The argument returned by this operation is of the form \\(atan2(b, a)\\). -// -// For example: -// -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.angle(input) ==> [2.0132, 1.056] -// ``` -// -// @compatibility(numpy) -// Equivalent to np.angle. -// @end_compatibility -func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Angle", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes natural logarithm of x element-wise. -// -// I.e., \\(y = \log_e x\\). -// -// Example: -// -// ```python -// x = tf.constant([0, 0.5, 1, 5]) -// tf.math.log(x) ==> [-inf, -0.6931472, 0. , 1.609438] -// ``` -func Log(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Log", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// IRFFT2DAttr is an optional argument to IRFFT2D. -type IRFFT2DAttr func(optionalAttr) - -// IRFFT2DTreal sets the optional Treal attribute to value. -// If not specified, defaults to DT_FLOAT -func IRFFT2DTreal(value tf.DataType) IRFFT2DAttr { - return func(m optionalAttr) { - m["Treal"] = value - } -} - -// Inverse 2D real-valued fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 2 dimensions of `input`. -// -// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 2 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along each axis `IRFFT2D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A complex tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 2D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.irfft2 -// @end_compatibility -func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFT2DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IRFFT2D", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResizeBicubicAttr is an optional argument to ResizeBicubic. -type ResizeBicubicAttr func(optionalAttr) - -// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// ResizeBicubicHalfPixelCenters sets the optional half_pixel_centers attribute to value. -// If not specified, defaults to false -func ResizeBicubicHalfPixelCenters(value bool) ResizeBicubicAttr { - return func(m optionalAttr) { - m["half_pixel_centers"] = value - } -} - -// Resize `images` to `size` using bicubic interpolation. -// -// Input images can be of different types but output images are always float. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeBicubic", - Input: []tf.Input{ - images, size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the mean along sparse segments of a tensor. -// -// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is -// missing, the `output` tensor at that position will be zeroed. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. -// -// Returns Has same shape as data, except for dimension 0 which has size -// `num_segments`. -func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanWithNumSegments", - Input: []tf.Input{ - data, indices, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RFFT2DAttr is an optional argument to RFFT2D. -type RFFT2DAttr func(optionalAttr) - -// RFFT2DTcomplex sets the optional Tcomplex attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func RFFT2DTcomplex(value tf.DataType) RFFT2DAttr { - return func(m optionalAttr) { - m["Tcomplex"] = value - } -} - -// 2D real-valued fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 2 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. -// -// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft2 -// @end_compatibility -func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFT2DAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RFFT2D", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RFFTAttr is an optional argument to RFFT. -type RFFTAttr func(optionalAttr) - -// RFFTTcomplex sets the optional Tcomplex attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func RFFTTcomplex(value tf.DataType) RFFTAttr { - return func(m optionalAttr) { - m["Tcomplex"] = value - } -} - -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. -// -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFTAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RFFT", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// JPEG encode input image with provided compression quality. -// -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. -// `quality` is an int32 jpeg compression quality value between 0 and 100. -// -// -// Arguments: -// images: Images to adjust. At least 3-D. -// quality: An int quality to encode to. -// -// Returns 0-D. JPEG-encoded image. -func EncodeJpegVariableQuality(scope *Scope, images tf.Output, quality tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "EncodeJpegVariableQuality", - Input: []tf.Input{ - images, quality, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// 3D fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform over the inner-most 3 -// dimensions of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their 3D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.fftn with 3 dimensions. -// @end_compatibility -func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FFT3D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that passes a sliding window over `input_dataset`. -// -// Arguments: -// -// window_size: A scalar representing the number of elements in the -// sliding window. -// window_shift: A scalar representing the steps moving the sliding window -// forward in one iteration. It must be positive. -// window_stride: A scalar representing the stride of the input elements of the sliding window. -// It must be positive. -// -// -func SlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SlidingWindowDataset", - Input: []tf.Input{ - input_dataset, window_size, window_shift, window_stride, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Locks a mutex resource. The output is the lock. So long as the lock tensor -// -// is alive, any other request to use `MutexLock` with this mutex will wait. -// -// This is particularly useful for creating a critical section when used in -// conjunction with `MutexLockIdentity`: -// -// ```python -// -// mutex = mutex_v2( -// shared_name=handle_name, container=container, name=name) -// -// def execute_in_critical_section(fn, *args, **kwargs): -// lock = gen_resource_variable_ops.mutex_lock(mutex) -// -// with ops.control_dependencies([lock]): -// r = fn(*args, **kwargs) -// -// with ops.control_dependencies(nest.flatten(r)): -// with ops.colocate_with(mutex): -// ensure_lock_exists = mutex_lock_identity(lock) -// -// # Make sure that if any element of r is accessed, all of -// # them are executed together. -// r = nest.map_structure(tf.identity, r) -// -// with ops.control_dependencies([ensure_lock_exists]): -// return nest.map_structure(tf.identity, r) -// ``` -// -// While `fn` is running in the critical section, no other functions which wish to -// use this critical section may run. -// -// Often the use case is that two executions of the same graph, in parallel, -// wish to run `fn`; and we wish to ensure that only one of them executes -// at a time. This is especially important if `fn` modifies one or more -// variables at a time. -// -// It is also useful if two separate functions must share a resource, but we -// wish to ensure the usage is exclusive. -// -// Arguments: -// mutex: The mutex resource to lock. -// -// Returns A tensor that keeps a shared pointer to a lock on the mutex; -// when the Tensor is destroyed, the use count on the shared pointer is decreased -// by 1. When it reaches 0, the lock is released. -func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MutexLock", - Input: []tf.Input{ - mutex, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolGradWithArgmaxAttr is an optional argument to MaxPoolGradWithArgmax. -type MaxPoolGradWithArgmaxAttr func(optionalAttr) - -// MaxPoolGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value. -// -// value: Whether to include batch dimension in flattened index of `argmax`. -// If not specified, defaults to false -func MaxPoolGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradWithArgmaxAttr { - return func(m optionalAttr) { - m["include_batch_in_index"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// output of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input of `max_pool`. -func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradWithArgmaxAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradWithArgmax", - Input: []tf.Input{ - input, grad, argmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// 2D fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform over the inner-most -// 2 dimensions of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.fft2 -// @end_compatibility -func FFT2D(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FFT2D", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. -type SdcaOptimizerAttr func(optionalAttr) - -// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. -// -// value: Whether to use Adaptive SDCA for the inner loop. -// If not specified, defaults to true -func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { - return func(m optionalAttr) { - m["adaptative"] = value - } -} - -// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for -// -// linear models with L1 + L2 regularization. As global optimization objective is -// strongly-convex, the optimizer optimizes the dual objective at each step. The -// optimizer applies each update one example at a time. Examples are sampled -// uniformly, and the optimizer is learning rate free and enjoys linear convergence -// rate. -// -// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
-// Shai Shalev-Shwartz, Tong Zhang. 2012 -// -// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ -// -// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, -// Peter Richtarik, Martin Takac. 2015 -// -// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 -// -// Arguments: -// sparse_example_indices: a list of vectors which contain example indices. -// sparse_feature_indices: a list of vectors which contain feature indices. -// sparse_feature_values: a list of vectors which contains feature value -// associated with each feature group. -// dense_features: a list of matrices which contains the dense feature values. -// example_weights: a vector which contains the weight associated with each -// example. -// example_labels: a vector which contains the label/target associated with each -// example. -// sparse_indices: a list of vectors where each value is the indices which has -// corresponding weights in sparse_weights. This field maybe omitted for the -// dense approach. -// sparse_weights: a list of vectors where each value is the weight associated with -// a sparse feature group. -// dense_weights: a list of vectors where the values are the weights associated -// with a dense feature group. -// example_state_data: a list of vectors containing the example state data. -// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, -// squared and hinge losses. -// l1: Symmetric l1 regularization strength. -// l2: Symmetric l2 regularization strength. -// num_loss_partitions: Number of partitions of the global loss function. -// num_inner_iterations: Number of iterations per mini-batch. -// -// Returns: -// out_example_state_data: a list of vectors containing the updated example state -// data. -// out_delta_sparse_weights: a list of vectors where each value is the delta -// weights associated with a sparse feature group. -// out_delta_dense_weights: a list of vectors where the values are the delta -// weights associated with a dense feature group. -func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SdcaOptimizer", - Input: []tf.Input{ - tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - out_example_state_data = op.Output(idx) - if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights -} - -// Inverse fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IFFT", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CollectiveGatherAttr is an optional argument to CollectiveGather. -type CollectiveGatherAttr func(optionalAttr) - -// CollectiveGatherCommunicationHint sets the optional communication_hint attribute to value. -// If not specified, defaults to "auto" -func CollectiveGatherCommunicationHint(value string) CollectiveGatherAttr { - return func(m optionalAttr) { - m["communication_hint"] = value - } -} - -// CollectiveGatherTimeoutSeconds sets the optional timeout_seconds attribute to value. -// If not specified, defaults to 0 -func CollectiveGatherTimeoutSeconds(value float32) CollectiveGatherAttr { - return func(m optionalAttr) { - m["timeout_seconds"] = value - } -} - -// Mutually accumulates multiple tensors of identical type and shape. -func CollectiveGather(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveGatherAttr) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CollectiveGather", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// L2 Loss. -// -// Computes half the L2 norm of a tensor without the `sqrt`: -// -// output = sum(t ** 2) / 2 -// -// Arguments: -// t: Typically 2-D, but may have any dimensions. -// -// Returns 0-D. -func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "L2Loss", - Input: []tf.Input{ - t, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// An op that receives embedding activations on the TPU. -// -// The TPU system performs the embedding lookups and aggregations specified by -// the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The -// results of these aggregations are visible to the Tensorflow Graph as the -// outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing -// one Tensor of activations per table specified in the model. There can be at -// most one RecvTPUEmbeddingActivations op in the TPU graph. -// -// Arguments: -// num_outputs: The number of output activation tensors, equal to the number of -// embedding tables in the model. -// config: Serialized TPUEmbeddingConfiguration proto. -// -// Returns A TensorList of embedding activations containing one Tensor per -// embedding table in the model. -func RecvTPUEmbeddingActivations(scope *Scope, num_outputs int64, config string) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_outputs": num_outputs, "config": config} - opspec := tf.OpSpec{ - Type: "RecvTPUEmbeddingActivations", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("RecvTPUEmbeddingActivations", err) - return - } - return outputs -} - -// Computes the derivative of a Gamma random sample w.r.t. `alpha`. -func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RandomGammaGrad", - Input: []tf.Input{ - alpha, sample, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LRNGradAttr is an optional argument to LRNGrad. -type LRNGradAttr func(optionalAttr) - -// LRNGradDepthRadius sets the optional depth_radius attribute to value. -// -// value: A depth radius. -// If not specified, defaults to 5 -func LRNGradDepthRadius(value int64) LRNGradAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNGradBias sets the optional bias attribute to value. -// -// value: An offset (usually > 0 to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNGradBias(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNGradAlpha sets the optional alpha attribute to value. -// -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNGradAlpha(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// LRNGradBeta sets the optional beta attribute to value. -// -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNGradBeta(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["beta"] = value - } -} - -// Gradients for Local Response Normalization. -// -// Arguments: -// input_grads: 4-D with shape `[batch, height, width, channels]`. -// input_image: 4-D with shape `[batch, height, width, channels]`. -// output_image: 4-D with shape `[batch, height, width, channels]`. -// -// Returns The gradients for LRN. -func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LRNGrad", - Input: []tf.Input{ - input_grads, input_image, output_image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PrelinearizeAttr is an optional argument to Prelinearize. -type PrelinearizeAttr func(optionalAttr) - -// PrelinearizeShape sets the optional shape attribute to value. -// -// value: The shape of the tensor. -// If not specified, defaults to <> -func PrelinearizeShape(value tf.Shape) PrelinearizeAttr { - return func(m optionalAttr) { - m["shape"] = value - } -} - -// PrelinearizeLayout sets the optional layout attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence. If a layout -// attribute is passed but its values are all -1 the layout will be computed by -// the infeed operation. -// If not specified, defaults to <> -func PrelinearizeLayout(value []int64) PrelinearizeAttr { - return func(m optionalAttr) { - m["layout"] = value - } -} - -// An op which linearizes one Tensor value to an opaque variant tensor. -// -// Arguments: -// input: A tensor that will be linearized. -func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Prelinearize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sparse Cholesky decomposition of `input`. -// -// Computes the Sparse Cholesky decomposition of a sparse matrix, with the given -// fill-in reducing permutation. -// -// The input sparse matrix and the fill-in reducing permutation `permutation` must -// have compatible shapes. If the sparse matrix has rank 3; with the batch -// dimension `B`, then the `permutation` must be of rank 2; with the same batch -// dimension `B`. There is no support for broadcasting. -// -// Furthermore, each component vector of `permutation` must be of length `N`, -// containing each of the integers {0, 1, ..., N - 1} exactly once, where `N` is -// the number of rows of each component of the sparse matrix. -// -// Each component of the input sparse matrix must represent a symmetric positive -// definite (SPD) matrix; although only the lower triangular part of the matrix is -// read. If any individual component is not SPD, then an InvalidArgument error is -// thrown. -// -// The returned sparse matrix has the same dense shape as the input sparse matrix. -// For each component `A` of the input sparse matrix, the corresponding output -// sparse matrix represents `L`, the lower triangular Cholesky factor satisfying -// the following identity: -// -// ``` -// A = L * Lt -// ``` -// -// where Lt denotes the transpose of L (or its conjugate transpose, if `type` is -// `complex64` or `complex128`). -// -// The `type` parameter denotes the type of the matrix elements. The supported -// types are: `float32`, `float64`, `complex64` and `complex128`. -// -// Usage example: -// -// ```python -// from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops -// -// a_indices = np.array([[0, 0], [1, 1], [2, 1], [2, 2], [3, 3]]) -// a_values = np.array([1.0, 2.0, 1.0, 3.0, 4.0], np.float32) -// a_dense_shape = [4, 4] -// -// with tf.Session() as sess: -// # Define (COO format) SparseTensor over Numpy array. -// a_st = tf.sparse.SparseTensor(a_indices, a_values, a_dense_shape) -// -// # Convert SparseTensors to CSR SparseMatrix. -// a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix( -// a_st.indices, a_st.values, a_st.dense_shape) -// -// # Obtain the Sparse Cholesky factor using AMD Ordering for reducing zero -// # fill-in (number of structural non-zeros in the sparse Cholesky factor). -// ordering_amd = sparse_csr_matrix_ops.sparse_matrix_ordering_amd(sparse_matrix) -// cholesky_sparse_matrices = ( -// sparse_csr_matrix_ops.sparse_matrix_sparse_cholesky( -// sparse_matrix, ordering_amd, type=tf.float32)) -// -// # Convert the CSRSparseMatrix Cholesky factor to a dense Tensor -// dense_cholesky = sparse_csr_matrix_ops.csr_sparse_matrix_to_dense( -// cholesky_sparse_matrices, tf.float32) -// -// # Evaluate the dense Tensor value. -// dense_cholesky_value = sess.run(dense_cholesky) -// ``` -// -// `dense_cholesky_value` stores the dense Cholesky factor: -// -// ``` -// [[ 1. 0. 0. 0.] -// [ 0. 1.41 0. 0.] -// [ 0. 0.70 1.58 0.] -// [ 0. 0. 0. 2.]] -// ``` -// -// -// input: A `CSRSparseMatrix`. -// permutation: A `Tensor`. -// type: The type of `input`. -// -// Arguments: -// input: A `CSRSparseMatrix`. -// permutation: A fill-in reducing permutation matrix. -// -// -// Returns The sparse Cholesky decompsition of `input`. -func SparseMatrixSparseCholesky(scope *Scope, input tf.Output, permutation tf.Output, type_ tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixSparseCholesky", - Input: []tf.Input{ - input, permutation, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatefulTruncatedNormalAttr is an optional argument to StatefulTruncatedNormal. -type StatefulTruncatedNormalAttr func(optionalAttr) - -// StatefulTruncatedNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulTruncatedNormalDtype(value tf.DataType) StatefulTruncatedNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. -// -// Returns Random values with specified shape. -func StatefulTruncatedNormal(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulTruncatedNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatefulTruncatedNormal", - Input: []tf.Input{ - resource, algorithm, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the rank of a tensor. -// -// This operation returns an integer representing the rank of `input`. -// -// For example: -// -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// # shape of tensor 't' is [2, 2, 3] -// rank(t) ==> 3 -// ``` -// -// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank -// of a tensor is the number of indices required to uniquely select each element -// of the tensor. Rank is also known as "order", "degree", or "ndims." -func Rank(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rank", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EncodeJpegAttr is an optional argument to EncodeJpeg. -type EncodeJpegAttr func(optionalAttr) - -// EncodeJpegFormat sets the optional format attribute to value. -// -// value: Per pixel image format. -// If not specified, defaults to "" -func EncodeJpegFormat(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["format"] = value - } -} - -// EncodeJpegQuality sets the optional quality attribute to value. -// -// value: Quality of the compression from 0 to 100 (higher is better and slower). -// If not specified, defaults to 95 -func EncodeJpegQuality(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["quality"] = value - } -} - -// EncodeJpegProgressive sets the optional progressive attribute to value. -// -// value: If True, create a JPEG that loads progressively (coarse to fine). -// If not specified, defaults to false -func EncodeJpegProgressive(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["progressive"] = value - } -} - -// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. -// -// value: If True, spend CPU/RAM to reduce size with no quality change. -// If not specified, defaults to false -func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["optimize_size"] = value - } -} - -// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. -// -// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. -// If not specified, defaults to true -func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["chroma_downsampling"] = value - } -} - -// EncodeJpegDensityUnit sets the optional density_unit attribute to value. -// -// value: Unit used to specify `x_density` and `y_density`: -// pixels per inch (`'in'`) or centimeter (`'cm'`). -// If not specified, defaults to "in" -func EncodeJpegDensityUnit(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["density_unit"] = value - } -} - -// EncodeJpegXDensity sets the optional x_density attribute to value. -// -// value: Horizontal pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegXDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["x_density"] = value - } -} - -// EncodeJpegYDensity sets the optional y_density attribute to value. -// -// value: Vertical pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegYDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["y_density"] = value - } -} - -// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. -// -// value: If not empty, embed this XMP metadata in the image header. -// If not specified, defaults to "" -func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["xmp_metadata"] = value - } -} - -// JPEG-encode an image. -// -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. -// -// The attr `format` can be used to override the color format of the encoded -// output. Values can be: -// -// * `''`: Use a default format based on the number of channels in the image. -// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension -// of `image` must be 1. -// * `rgb`: Output an RGB JPEG image. The `channels` dimension -// of `image` must be 3. -// -// If `format` is not specified or is the empty string, a default format is picked -// in function of the number of channels in `image`: -// -// * 1: Output a grayscale image. -// * 3: Output an RGB image. -// -// Arguments: -// image: 3-D with shape `[height, width, channels]`. -// -// Returns 0-D. JPEG-encoded image. -func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EncodeJpeg", - Input: []tf.Input{ - image, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Checks whether a quantile stream has been initialized. -// -// An Op that checks if quantile stream resource is initialized. -// -// Arguments: -// quantile_stream_resource_handle: resource; The reference to quantile stream resource handle. -// -// Returns bool; True if the resource is initialized, False otherwise. -func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stream_resource_handle tf.Output) (is_initialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsBoostedTreesQuantileStreamResourceInitialized", - Input: []tf.Input{ - quantile_stream_resource_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform over the inner-most -// dimension of `input`. -// -// Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.fft -// @end_compatibility -func FFT(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FFT", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UniqueV2Attr is an optional argument to UniqueV2. -type UniqueV2Attr func(optionalAttr) - -// UniqueV2OutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Finds unique elements along an axis of a tensor. -// -// This operation either returns a tensor `y` containing unique elements -// along the `axis` of a tensor. The returned unique elements is sorted -// in the same order as they occur along `axis` in `x`. -// This operation also returns a tensor `idx` that is the same size as -// the number of the elements in `x` along the `axis` dimension. It -// contains the index in the unique output `y`. -// In other words, for an `1-D` tensor `x` with `axis = None: -// -// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]` -// -// For example: -// -// ``` -// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8] -// y, idx = unique(x) -// y ==> [1, 2, 4, 7, 8] -// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] -// ``` -// -// For an `2-D` tensor `x` with `axis = 0`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx = unique(x, axis=0) -// y ==> [[1, 0, 0], -// [2, 0, 0]] -// idx ==> [0, 0, 1] -// ``` -// -// For an `2-D` tensor `x` with `axis = 1`: -// -// ``` -// # tensor 'x' is [[1, 0, 0], -// # [1, 0, 0], -// # [2, 0, 0]] -// y, idx = unique(x, axis=1) -// y ==> [[1, 0], -// [1, 0], -// [2, 0]] -// idx ==> [0, 1, 1] -// ``` -// -// Arguments: -// x: A `Tensor`. -// axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to -// find the unique elements. -// -// Returns: -// y: A `Tensor`. Unique elements along the `axis` of `Tensor` x. -// idx: A 1-D Tensor. Has the same type as x that contains the index of each -// value of x in the output y. -func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UniqueV2", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// DecodePaddedRawAttr is an optional argument to DecodePaddedRaw. -type DecodePaddedRawAttr func(optionalAttr) - -// DecodePaddedRawLittleEndian sets the optional little_endian attribute to value. -// -// value: Whether the input `input_bytes` is in little-endian order. Ignored for -// `out_type` values that are stored in a single byte, like `uint8` -// If not specified, defaults to true -func DecodePaddedRawLittleEndian(value bool) DecodePaddedRawAttr { - return func(m optionalAttr) { - m["little_endian"] = value - } -} - -// Reinterpret the bytes of a string as a vector of numbers. -// -// Arguments: -// input_bytes: Tensor of string to be decoded. -// fixed_length: Length in bytes for each element of the decoded output. Must be a multiple -// of the size of the output type. -// -// -// Returns A Tensor with one more dimension than the input `bytes`. The added dimension -// will have size equal to the length of the elements of `bytes` divided by the -// number of bytes to represent `out_type`. -func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output, out_type tf.DataType, optional ...DecodePaddedRawAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"out_type": out_type} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodePaddedRaw", - Input: []tf.Input{ - input_bytes, fixed_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters. -type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersConfig(value string) RetrieveTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve ADAM embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the ADAM optimization algorithm. -// momenta: Parameter momenta updated by the ADAM optimization algorithm. -// velocities: Parameter velocities updated by the ADAM optimization algorithm. -func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingADAMParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatelessRandomBinomialAttr is an optional argument to StatelessRandomBinomial. -type StatelessRandomBinomialAttr func(optionalAttr) - -// StatelessRandomBinomialDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_INT64 -func StatelessRandomBinomialDtype(value tf.DataType) StatelessRandomBinomialAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom random numbers from a binomial distribution. -// -// Outputs random values from a binomial distribution. -// -// The outputs are a deterministic function of `shape`, `seed`, `counts`, and `probs`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// counts: The counts of the binomial distribution. Must be broadcastable with `probs`, -// and broadcastable with the rightmost dimensions of `shape`. -// probs: The probability of success for the binomial distribution. Must be broadcastable -// with `counts` and broadcastable with the rightmost dimensions of `shape`. -// -// Returns Random values with specified shape. -func StatelessRandomBinomial(scope *Scope, shape tf.Output, seed tf.Output, counts tf.Output, probs tf.Output, optional ...StatelessRandomBinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessRandomBinomial", - Input: []tf.Input{ - shape, seed, counts, probs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug. -type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Adagrad embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Adagrad optimization algorithm. -// accumulators: Parameter accumulators updated by the Adagrad optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the Adagrad optimization algorithm. -func RetrieveTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. -type StatelessMultinomialAttr func(optionalAttr) - -// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { - return func(m optionalAttr) { - m["output_dtype"] = value - } -} - -// Draws samples from a multinomial distribution. -// -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// seed: 2 seeds (shape [2]). -// -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessMultinomial", - Input: []tf.Input{ - logits, num_samples, seed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a copy of the input tensor. -func Snapshot(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Snapshot", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. -// -// This Op does not require `a_indices` be sorted in standard lexicographic order. -// -// Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. -// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. -// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. -// b: `ndims`-D Tensor. With shape `a_shape`. -func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseTensorDenseAdd", - Input: []tf.Input{ - a_indices, a_values, a_shape, b, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RaggedBincountAttr is an optional argument to RaggedBincount. -type RaggedBincountAttr func(optionalAttr) - -// RaggedBincountBinaryOutput sets the optional binary_output attribute to value. -// -// value: bool; Whether the kernel should count the appearance or number of occurrences. -// If not specified, defaults to false -func RaggedBincountBinaryOutput(value bool) RaggedBincountAttr { - return func(m optionalAttr) { - m["binary_output"] = value - } -} - -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. -// -// Values in `arr` outside of the range [0, size) are ignored. -// -// Arguments: -// splits: 1D int64 `Tensor`. -// values: 2D int `Tensor`. -// size: non-negative int scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `input`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. -// -// Returns 1D `Tensor` with length equal to `size` or 2D `Tensor` with [batch_size, `size`]. -// The counts or summed weights for each value in the range [0, size). -func RaggedBincount(scope *Scope, splits tf.Output, values tf.Output, size tf.Output, weights tf.Output, optional ...RaggedBincountAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RaggedBincount", - Input: []tf.Input{ - splits, values, size, weights, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. -type StatelessRandomNormalAttr func(optionalAttr) - -// StatelessRandomNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// -// Returns Random values with specified shape. -func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessRandomNormal", - Input: []tf.Input{ - shape, seed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. -// -// N is the size of the segment being reduced. -// -// See `tf.sparse.segment_sum` for usage examples. -// -// -// Arguments: -// -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", - Input: []tf.Input{ - data, indices, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. -type UnicodeDecodeWithOffsetsAttr func(optionalAttr) - -// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value. -// -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["replace_control_characters"] = value - } -} - -// UnicodeDecodeWithOffsetsTsplits sets the optional Tsplits attribute to value. -// If not specified, defaults to DT_INT64 -func UnicodeDecodeWithOffsetsTsplits(value tf.DataType) UnicodeDecodeWithOffsetsAttr { - return func(m optionalAttr) { - m["Tsplits"] = value - } -} - -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// Similarly, the character start byte offsets are returned using a single vector -// `char_to_byte_starts`, with strings expanded in row-major order. -// -// The `row_splits` tensor indicates where the codepoints and start offsets for -// each input string begin and end within the `char_values` and -// `char_to_byte_starts` tensors. In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: -// -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). -// -// Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// -// Returns: -// row_splits: A 1D int32 tensor containing the row splits. -// char_values: A 1D int32 Tensor containing the decoded codepoints. -// char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each -// character in `char_values` starts. -func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_encoding": input_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeDecodeWithOffsets", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// This op is used as a placeholder in If branch functions. It doesn't provide a -// valid output when run, so must either be removed (e.g. replaced with a -// function input) or guaranteed not to be used (e.g. if mirroring an -// intermediate output needed for the gradient computation of the other branch). -// -// Arguments: -// dtype: The type of the output. -// shape: The purported shape of the output. This is only used for shape inference; -// the output will not necessarily have this shape. Can be a partial shape. -// -// Returns \"Fake\" output value. This should not be consumed by another op. -func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - opspec := tf.OpSpec{ - Type: "FakeParam", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode. -type UnicodeTranscodeAttr func(optionalAttr) - -// UnicodeTranscodeErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// -// Note that for UTF-8, passing a replacement character expressible in 1 byte, such -// as ' ', will preserve string alignment to the source since invalid bytes will be -// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte -// replacement character will preserve byte alignment to the source. -// If not specified, defaults to 65533 -func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. -// -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr { - return func(m optionalAttr) { - m["replace_control_characters"] = value - } -} - -// Transcode the input text from a source encoding to a destination encoding. -// -// The input is a string tensor of any shape. The output is a string tensor of -// the same shape containing the transcoded strings. Output strings are always -// valid unicode. If the input contains invalid encoding positions, the -// `errors` attribute sets the policy for how to deal with them. If the default -// error-handling policy is used, invalid formatting will be substituted in the -// output by the `replacement_char`. If the errors policy is to `ignore`, any -// invalid encoding positions in the input are skipped and not included in the -// output. If it set to `strict` then any invalid formatting will result in an -// InvalidArgument error. -// -// This operation can be used with `output_encoding = input_encoding` to enforce -// correct formatting for inputs even if they are already in the desired encoding. -// -// If the input is prefixed by a Byte Order Mark needed to determine encoding -// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that -// BOM will be consumed and not emitted into the output. If the input encoding -// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is -// interpreted as a non-breaking-space and is preserved in the output (including -// always for UTF-8). -// -// The end result is that if the input is marked as an explicit endianness the -// transcoding is faithful to all codepoints in the source. If it is not marked -// with an explicit endianness, the BOM is not considered part of the string itself -// but as metadata, and so is not preserved in the output. -// -// Examples: -// -// >>> tf.strings.unicode_transcode(["Hello", "TensorFlow", "2.x"], "UTF-8", "UTF-16-BE") -// -// >>> tf.strings.unicode_transcode(["A", "B", "C"], "US ASCII", "UTF-8").numpy() -// array([b'A', b'B', b'C'], dtype=object) -// -// Arguments: -// input: The text to be processed. Can have any shape. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// output_encoding: The unicode encoding to use in the output. Must be one of -// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian. -// -// Returns A string tensor containing unicode text encoded using `output_encoding`. -func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeTranscode", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x // y element-wise. -// -// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FloorDiv", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// An Op to permute tensors across replicated TPU instances. -// -// Each instance supplies its own input. -// -// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing -// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs: -// `[D, A, B, C]`. -// -// Arguments: -// input: The local input to be permuted. Currently only supports float and -// bfloat16. -// source_target_pairs: A tensor with shape [num_pairs, 2]. -// -// Returns The permuted input. -func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CollectivePermute", - Input: []tf.Input{ - input, source_target_pairs, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizedReluXAttr is an optional argument to QuantizedReluX. -type QuantizedReluXAttr func(optionalAttr) - -// QuantizedReluXOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` -// -// Arguments: -// -// -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. -// -// Returns: -// activations: Has the same output shape as "features". -// min_activations: The float value that the lowest quantized value represents. -// max_activations: The float value that the highest quantized value represents. -func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizedReluX", - Input: []tf.Input{ - features, max_value, min_features, max_features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Forwards `data` to the output port determined by `pred`. -// -// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise, -// the data goes to `output_false`. -// -// See also `RefSwitch` and `Merge`. -// -// Arguments: -// data: The tensor to be forwarded to the appropriate output. -// pred: A scalar that specifies which output port will receive data. -// -// Returns: -// output_false: If `pred` is false, data will be forwarded to this output. -// output_true: If `pred` is true, data will be forwarded to this output. -func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Switch", - Input: []tf.Input{ - data, pred, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingFTRLParametersGradAccumDebug. -type RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve FTRL embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the FTRL optimization algorithm. -// accumulators: Parameter accumulators updated by the FTRL optimization algorithm. -// linears: Parameter linears updated by the FTRL optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the FTRL optimization algorithm. -func RetrieveTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// UnicodeEncodeAttr is an optional argument to UnicodeEncode. -type UnicodeEncodeAttr func(optionalAttr) - -// UnicodeEncodeErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeEncodeErrors(value string) UnicodeEncodeAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} - -// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD (U+65533). -// If not specified, defaults to 65533 -func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr { - return func(m optionalAttr) { - m["replacement_char"] = value - } -} - -// Encode a tensor of ints into unicode strings. -// -// Returns a vector of strings, where `output[i]` is constructed by encoding the -// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]` -// using `output_encoding`. -// -// --- -// -// Example: -// -// ``` -// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100] -// input_splits = [0, 5, 10] -// output_encoding = 'UTF-8' -// -// output = ['Hello', 'World'] -// ``` -// -// Arguments: -// input_values: A 1D tensor containing the unicode codepoints that should be encoded. -// input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings. -// In particular, `output[i]` is constructed by encoding the codepoints in the -// slice `input_values[input_splits[i]:input_splits[i+1]]`. -// output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8", -// "UTF-16-BE", and "UTF-32-BE"`. -// -// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints. -func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_encoding": output_encoding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnicodeEncode", - Input: []tf.Input{ - input_values, input_splits, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PrelinearizeTupleAttr is an optional argument to PrelinearizeTuple. -type PrelinearizeTupleAttr func(optionalAttr) - -// PrelinearizeTupleLayouts sets the optional layouts attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence for all the -// tuple shapes in the order the shapes appear in the "shapes" input. The layout -// elements for a sub-shape can be set to -1 in which case the corresponding layout -// will be computed by the infeed operation. -// If not specified, defaults to <> -func PrelinearizeTupleLayouts(value []int64) PrelinearizeTupleAttr { - return func(m optionalAttr) { - m["layouts"] = value - } -} - -// An op which linearizes multiple Tensor values to an opaque variant tensor. -// -// Arguments: -// inputs: A list of tensors that will be provided using the infeed mechanism. -// shapes: The shapes of each tensor in `inputs`. -func PrelinearizeTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...PrelinearizeTupleAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shapes": shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PrelinearizeTuple", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the LSTM cell backward propagation for the entire time sequence. -// -// This implementation is to be used in conjunction of LSTMBlock. -// -// Arguments: -// seq_len_max: Maximum time length actually used by this input. Outputs are padded -// with zeros beyond this length. -// x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs). -// cs_prev: Value of the initial cell state. -// h_prev: Initial output of cell (to be used for peephole). -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// i: The input gate over the whole time sequence. -// cs: The cell state before the tanh over the whole time sequence. -// f: The forget gate over the whole time sequence. -// o: The output gate over the whole time sequence. -// ci: The cell input over the whole time sequence. -// co: The cell after the tanh over the whole time sequence. -// h: The output h vector over the whole time sequence. -// cs_grad: The current gradient of cs. -// h_grad: The gradient of h vector. -// use_peephole: Whether to use peephole weights. -// -// Returns: -// x_grad: The gradient of x to be back-propped. -// cs_prev_grad: The gradient of cs_prev to be back-propped. -// h_prev_grad: The gradient of h_prev to be back-propped. -// w_grad: The gradient for w to be back-propped. -// wci_grad: The gradient for wci to be back-propped. -// wcf_grad: The gradient for wcf to be back-propped. -// wco_grad: The gradient for wco to be back-propped. -// b_grad: The gradient for w to be back-propped. -func BlockLSTMGrad(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"use_peephole": use_peephole} - opspec := tf.OpSpec{ - Type: "BlockLSTMGrad", - Input: []tf.Input{ - seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7) -} - -// OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple. -type OutfeedDequeueTupleAttr func(optionalAttr) - -// OutfeedDequeueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func OutfeedDequeueTupleDeviceOrdinal(value int64) OutfeedDequeueTupleAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// Retrieve multiple values from the computation outfeed. -// -// This operation will block indefinitely until data is available. Output `i` -// corresponds to XLA tuple element `i`. -// -// Arguments: -// dtypes: The element types of each element in `outputs`. -// shapes: The shapes of each tensor in `outputs`. -// -// Returns A list of tensors that will be read from the outfeed. -func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, optional ...OutfeedDequeueTupleAttr) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OutfeedDequeueTuple", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("OutfeedDequeueTuple", err) - return - } - return outputs -} - -// DecodeCompressedAttr is an optional argument to DecodeCompressed. -type DecodeCompressedAttr func(optionalAttr) - -// DecodeCompressedCompressionType sets the optional compression_type attribute to value. -// -// value: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// If not specified, defaults to "" -func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// Decompress strings. -// -// This op decompresses each element of the `bytes` input `Tensor`, which -// is assumed to be compressed using the given `compression_type`. -// -// The `output` is a string `Tensor` of the same shape as `bytes`, -// each element containing the decompressed data from the corresponding -// element in `bytes`. -// -// Arguments: -// bytes: A Tensor of string which is compressed. -// -// Returns A Tensor with the same shape as input `bytes`, uncompressed -// from bytes. -func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeCompressed", - Input: []tf.Input{ - bytes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. The hash function is a keyed hash function, where attribute `key` -// defines the key of the hash function. `key` is an array of 2 elements. -// -// A strong hash is important when inputs may be malicious, e.g. URLs with -// additional components. Adversaries could try to make their inputs hash to the -// same bucket for a denial-of-service attack or to skew the results. A strong -// hash can be used to make it difficult to find inputs with a skewed hash value -// distribution over buckets. This requires that the hash function is -// seeded by a high-entropy (random) "key" unknown to the adversary. -// -// The additional robustness comes at a cost of roughly 4x higher compute -// time than `tf.string_to_hash_bucket_fast`. -// -// Examples: -// -// >>> tf.strings.to_hash_bucket_strong(["Hello", "TF"], 3, [1, 2]).numpy() -// array([2, 0]) -// -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// key: The key used to seed the hash function, passed as a list of two uint64 -// elements. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} - opspec := tf.OpSpec{ - Type: "StringToHashBucketStrong", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deserialize bucket boundaries and ready flag into current QuantileAccumulator. -// -// An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a feature. -// -// Returns the created operation. -func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream_resource_handle tf.Output, bucket_boundaries []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceDeserialize", - Input: []tf.Input{ - quantile_stream_resource_handle, tf.OutputList(bucket_boundaries), - }, - } - return scope.AddOperation(opspec) -} - -// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta. -type ResourceApplyAdadeltaAttr func(optionalAttr) - -// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var, accum and update_accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the adadelta scheme. -// -// accum = rho() * accum + (1 - rho()) * grad.square(); -// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad; -// update_accum = rho() * update_accum + (1 - rho()) * update.square(); -// var -= update; -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// accum_update: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdadelta", - Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. -// -// Examples: -// -// >>> tf.strings.to_hash_bucket_fast(["Hello", "TensorFlow", "2.x"], 3).numpy() -// array([0, 2, 2]) -// -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_buckets": num_buckets} - opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringJoinAttr is an optional argument to StringJoin. -type StringJoinAttr func(optionalAttr) - -// StringJoinSeparator sets the optional separator attribute to value. -// -// value: string, an optional join separator. -// If not specified, defaults to "" -func StringJoinSeparator(value string) StringJoinAttr { - return func(m optionalAttr) { - m["separator"] = value - } -} - -// Joins the strings in the given list of string tensors into one tensor; -// -// with the given separator (default is an empty separator). -// -// Examples: -// -// >>> s = ["hello", "world", "tensorflow"] -// >>> tf.strings.join(s, " ") -// -// -// Arguments: -// inputs: A list of string tensors. The tensors must all have the same shape, -// or be scalars. Scalars may be mixed in; these will be broadcast to the shape -// of non-scalar inputs. -func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StringJoin", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Replaces the contents of the table with the specified keys and values. -// -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. -// -// Returns the created operation. -func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LookupTableImportV2", - Input: []tf.Input{ - table_handle, keys, values, - }, - } - return scope.AddOperation(opspec) -} - -// LoadTPUEmbeddingMomentumParametersAttr is an optional argument to LoadTPUEmbeddingMomentumParameters. -type LoadTPUEmbeddingMomentumParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingMomentumParametersTableId(value int64) LoadTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersTableName(value string) LoadTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersConfig(value string) LoadTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Momentum embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Momentum optimization algorithm. -// momenta: Value of momenta used in the Momentum optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingMomentumParameters(scope *Scope, parameters tf.Output, momenta tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingMomentumParameters", - Input: []tf.Input{ - parameters, momenta, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) - -// SkipgramWindowSize sets the optional window_size attribute to value. -// -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["window_size"] = value - } -} - -// SkipgramMinCount sets the optional min_count attribute to value. -// -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["min_count"] = value - } -} - -// SkipgramSubsample sets the optional subsample attribute to value. -// -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { - return func(m optionalAttr) { - m["subsample"] = value - } -} - -// Parses a text file and creates a batch of examples. -// -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result -// -// Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. -// -// Returns: -// vocab_word: A vector of words in the corpus. -// vocab_freq: Frequencies of words. Sorted in the non-ascending order. -// words_per_epoch: Number of words per epoch in the data file. -// current_epoch: The current epoch number. -// total_words_processed: The total number of words processed so far. -// examples: A vector of word ids. -// labels: A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Skipgram", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace. -type StaticRegexReplaceAttr func(optionalAttr) - -// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value. -// -// value: If True, the replacement is global, otherwise the replacement -// is done only on the first match. -// If not specified, defaults to true -func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr { - return func(m optionalAttr) { - m["replace_global"] = value - } -} - -// Replaces the match of pattern in input with rewrite. -// -// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) -// -// Arguments: -// input: The text to be processed. -// pattern: The regular expression to match the input. -// rewrite: The rewrite to be applied to the matched expression. -// -// Returns The text after applying pattern and rewrite. -func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StaticRegexReplace", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns which elements of x are finite. -// -// @compatibility(numpy) -// Equivalent to np.isfinite -// @end_compatibility -// -// Example: -// -// ```python -// x = tf.constant([5.0, 4.8, 6.8, np.inf, np.nan]) -// tf.math.is_finite(x) ==> [True, True, True, False, False] -// ``` -func IsFinite(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "IsFinite", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a tensor of zeros with the same shape and type as x. -// -// Arguments: -// x: a tensor of type T. -// -// Returns a tensor of the same shape and type as x but filled with zeros. -func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ZerosLike", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingAdadeltaParametersAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParameters. -type RetrieveTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingAdadeltaParametersTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Adadelta embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Adadelta optimization algorithm. -// accumulators: Parameter accumulators updated by the Adadelta optimization algorithm. -// updates: Parameter updates updated by the Adadelta optimization algorithm. -func RetrieveTPUEmbeddingAdadeltaParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingAdadeltaParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// RegexReplaceAttr is an optional argument to RegexReplace. -type RegexReplaceAttr func(optionalAttr) - -// RegexReplaceReplaceGlobal sets the optional replace_global attribute to value. -// -// value: If True, the replacement is global (that is, all matches of the `pattern` regular -// expression in each input string are rewritten), otherwise the `rewrite` -// substitution is only made for the first `pattern` match. -// If not specified, defaults to true -func RegexReplaceReplaceGlobal(value bool) RegexReplaceAttr { - return func(m optionalAttr) { - m["replace_global"] = value - } -} - -// Replaces matches of the `pattern` regular expression in `input` with the -// replacement string provided in `rewrite`. -// -// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) -// -// Arguments: -// input: The text to be processed. -// pattern: The regular expression to be matched in the `input` strings. -// rewrite: The rewrite string to be substituted for the `pattern` expression where it is -// matched in the `input` strings. -// -// Returns The text after applying pattern match and rewrite substitution. -func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.Output, optional ...RegexReplaceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RegexReplace", - Input: []tf.Input{ - input, pattern, rewrite, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ExperimentalRebatchDatasetAttr is an optional argument to ExperimentalRebatchDataset. -type ExperimentalRebatchDatasetAttr func(optionalAttr) - -// ExperimentalRebatchDatasetUseFallback sets the optional use_fallback attribute to value. -// If not specified, defaults to true -func ExperimentalRebatchDatasetUseFallback(value bool) ExperimentalRebatchDatasetAttr { - return func(m optionalAttr) { - m["use_fallback"] = value - } -} - -// Creates a dataset that changes the batch size. -// -// Creates a dataset that changes the batch size of the dataset to current batch -// size // num_replicas. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_replicas: A scalar representing the number of replicas to distribute this batch across. As -// a result of this transformation the current batch size would end up being -// divided by this parameter. -// -// -func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalRebatchDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ExperimentalRebatchDataset", - Input: []tf.Input{ - input_dataset, num_replicas, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Concatenates tensors along one dimension. -// -// Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Concat", - Input: []tf.Input{ - concat_dim, tf.OutputList(values), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. -type ResourceApplyPowerSignAttr func(optionalAttr) - -// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the AddSign update. -// -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g -// variable <- variable - lr_t * update -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// logbase: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyPowerSign", - Input: []tf.Input{ - var_, m, lr, logbase, sign_decay, beta, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Converts a tensor to a scalar predicate. -// -// Converts a tensor to a scalar predicate with the following rules: -// -// - For 0D tensors, truthiness is determined by comparing against a "zero" -// value. For numerical types it is the obvious zero. For strings it is the -// empty string. -// -// - For >0D tensors, truthiness is determined by looking at the number of -// elements. If has zero elements, then the result is false. Otherwise the -// result is true. -// -// This matches the behavior of If and While for determining if a tensor counts -// as true/false for a branch condition. -func ToBool(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ToBool", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// GenerateBoundingBoxProposalsAttr is an optional argument to GenerateBoundingBoxProposals. -type GenerateBoundingBoxProposalsAttr func(optionalAttr) - -// GenerateBoundingBoxProposalsPostNmsTopn sets the optional post_nms_topn attribute to value. -// -// value: An integer. Maximum number of rois in the output. -// If not specified, defaults to 300 -func GenerateBoundingBoxProposalsPostNmsTopn(value int64) GenerateBoundingBoxProposalsAttr { - return func(m optionalAttr) { - m["post_nms_topn"] = value - } -} - -// This op produces Region of Interests from given bounding boxes(bbox_deltas) encoded wrt anchors according to eq.2 in arXiv:1506.01497 -// -// The op selects top `pre_nms_topn` scoring boxes, decodes them with respect to anchors, -// applies non-maximal suppression on overlapping boxes with higher than -// `nms_threshold` intersection-over-union (iou) value, discarding boxes where shorter -// side is less than `min_size`. -// Inputs: -// `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given position -// `bbox_deltas`: is a tensor of shape [Batch, Height, Width, 4 x Num Anchors] boxes encoded to each anchor -// `anchors`: A 1D tensor of shape [4 x Num Anchors], representing the anchors. -// Outputs: -// `rois`: output RoIs, a 3D tensor of shape [Batch, post_nms_topn, 4], padded by 0 if less than post_nms_topn candidates found. -// `roi_probabilities`: probability scores of each roi in 'rois', a 2D tensor of shape [Batch,post_nms_topn], padded with 0 if needed, sorted by scores. -// -// Arguments: -// scores: A 4-D float tensor of shape `[num_images, height, width, num_achors]` containing scores of the boxes for given anchors, can be unsorted. -// bbox_deltas: A 4-D float tensor of shape `[num_images, height, width, 4 x num_anchors]`. encoding boxes with respec to each anchor. -// Coordinates are given in the form [dy, dx, dh, dw]. -// image_info: A 2-D float tensor of shape `[num_images, 5]` containing image information Height, Width, Scale. -// anchors: A 2-D float tensor of shape `[num_anchors, 4]` describing the anchor boxes. Boxes are formatted in the form [y1, x1, y2, x2]. -// nms_threshold: A scalar float tensor for non-maximal-suppression threshold. -// pre_nms_topn: A scalar int tensor for the number of top scoring boxes to be used as input. -// min_size: A scalar float tensor. Any box that has a smaller size than min_size will be discarded. -// -// Returns: -// rois: A 3-D float tensor of shape `[num_images,post_nms_topn,4]` representing the selected -// region of interest boxes. Sorted in descending order in scores. -// roi_probabilities: A 2-D float tensor of shape `[num_images, post_nms_topn]` representing the score of the -// region of interest box in `rois` tensor at the same index. -func GenerateBoundingBoxProposals(scope *Scope, scores tf.Output, bbox_deltas tf.Output, image_info tf.Output, anchors tf.Output, nms_threshold tf.Output, pre_nms_topn tf.Output, min_size tf.Output, optional ...GenerateBoundingBoxProposalsAttr) (rois tf.Output, roi_probabilities tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "GenerateBoundingBoxProposals", - Input: []tf.Input{ - scores, bbox_deltas, image_info, anchors, nms_threshold, pre_nms_topn, min_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2. -type InitializeTableFromTextFileV2Attr func(optionalAttr) - -// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value. -// -// value: Number of elements of the file, use -1 if unknown. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr { - return func(m optionalAttr) { - m["vocab_size"] = value - } -} - -// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value. -// -// value: Delimiter to separate fields in a line. -// If not specified, defaults to "\t" -func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr { - return func(m optionalAttr) { - m["delimiter"] = value - } -} - -// Initializes a table from a text file. -// -// It inserts one key-value pair into the table for each line of the file. -// The key and value is extracted from the whole line content, elements from the -// split line based on `delimiter` or the line number (starting from zero). -// Where to extract the key and value from a line is specified by `key_index` and -// `value_index`. -// -// - A value of -1 means use the line number(starting from zero), expects `int64`. -// - A value of -2 means use the whole line content, expects `string`. -// - A value >= 0 means use the index (starting at zero) of the split line based -// on `delimiter`. -// -// Arguments: -// table_handle: Handle to a table which will be initialized. -// filename: Filename of a vocabulary text file. -// key_index: Column index in a line to get the table `key` values from. -// value_index: Column index that represents information of a line to get the table -// `value` values from. -// -// Returns the created operation. -func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InitializeTableFromTextFileV2", - Input: []tf.Input{ - table_handle, filename, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns an element-wise indication of the sign of a number. -// -// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. -// -// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. -// -// Example usage: -// >>> tf.math.sign([0., 2., -3.]) -// -func Sign(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sign", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. -type ResourceApplyAddSignAttr func(optionalAttr) - -// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the AddSign update. -// -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- (alpha + sign_decay * sign(g) *sign(m)) * g -// variable <- variable - lr_t * update -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// alpha: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAddSign", - Input: []tf.Input{ - var_, m, lr, alpha, sign_decay, beta, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns the number of work units this Reader has finished processing. -// -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", - Input: []tf.Input{ - reader_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. -type FractionalMaxPoolAttr func(optionalAttr) - -// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value. -// -// value: When set to True, generates the pooling sequence in a -// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin -// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for -// difference between pseudorandom and random. -// If not specified, defaults to false -func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["pseudo_random"] = value - } -} - -// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` -// -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [20, 16] for fractional max pooling. -// If not specified, defaults to false -func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["overlapping"] = value - } -} - -// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value. -// -// value: When set to True, a fixed pooling region will be used when -// iterating over a FractionalMaxPool node in the computation graph. Mainly used -// in unit test to make FractionalMaxPool deterministic. -// If not specified, defaults to false -func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["deterministic"] = value - } -} - -// FractionalMaxPoolSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Performs fractional max pooling on the input. -// -// Fractional max pooling is slightly different than regular max pooling. In -// regular max pooling, you downsize an input set by taking the maximum value of -// smaller N x N subsections of the set (often 2x2), and try to reduce the set by -// a factor of N, where N is an integer. Fractional max pooling, as you might -// expect from the word "fractional", means that the overall reduction ratio N -// does not have to be an integer. -// -// The sizes of the pooling regions are generated randomly but are fairly uniform. -// For example, let's look at the height dimension, and the constraints on the -// list of rows that will be pool boundaries. -// -// First we define the following: -// -// 1. input_row_length : the number of rows from the input set -// 2. output_row_length : which will be smaller than the input -// 3. alpha = input_row_length / output_row_length : our reduction ratio -// 4. K = floor(alpha) -// 5. row_pooling_sequence : this is the result list of pool boundary rows -// -// Then, row_pooling_sequence should satisfy: -// -// 1. a[0] = 0 : the first value of the sequence is 0 -// 2. a[end] = input_row_length : the last value of the sequence is the size -// 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size -// 4. length(row_pooling_sequence) = output_row_length+1 -// -// For more details on fractional max pooling, see this paper: -// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) -// -// Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// pooling_ratio: Pooling ratio for each dimension of `value`, currently only -// supports row and col dimension and should be >= 1.0. For example, a valid -// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements -// must be 1.0 because we don't allow pooling on batch and channels -// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions -// respectively. -// -// Returns: -// output: output tensor after fractional max pooling. -// row_pooling_sequence: row pooling sequence, needed to calculate gradient. -// col_pooling_sequence: column pooling sequence, needed to calculate gradient. -func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FractionalMaxPool", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Computes the reciprocal of x element-wise. -// -// I.e., \\(y = 1 / x\\). -func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Reciprocal", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdagradParametersGradAccumDebug. -type LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adagrad embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the Adagrad optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdagradParametersGradAccumDebug", - Input: []tf.Input{ - parameters, accumulators, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Strip leading and trailing whitespaces from the Tensor. -// -// Arguments: -// input: A string `Tensor` of any shape. -// -// Returns A string `Tensor` of the same shape as the input. -// -// Examples: -// -// >>> tf.strings.strip(["\nTensorFlow", " The python library "]).numpy() -// array([b'TensorFlow', b'The python library'], dtype=object) -func StringStrip(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StringStrip", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the minimum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such -// that `segment_ids[j] == i`. -// -// If the min is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_min(c, tf.constant([0, 0, 1])) -// # ==> [[1, 2, 2, 1], -// # [5, 6, 7, 8]] -// ``` -// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMin", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Execute a sub graph on a remote processor. -// -// The graph specifications(such as graph itself, input tensors and output names) -// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo -// as serialized_remote_fused_graph_execute_info. -// The specifications will be passed to a dedicated registered -// remote fused graph executor. The executor will send the graph specifications -// to a remote processor and execute that graph. The execution results -// will be passed to consumer nodes as outputs of this node. -// -// Arguments: -// inputs: Arbitrary number of tensors with arbitrary data types -// -// serialized_remote_fused_graph_execute_info: Serialized protocol buffer -// of RemoteFusedGraphExecuteInfo which contains graph specifications. -// -// Returns Arbitrary number of tensors with arbitrary data types -func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info} - opspec := tf.OpSpec{ - Type: "RemoteFusedGraphExecute", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("RemoteFusedGraphExecute", err) - return - } - return outputs -} - -// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters. -type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load MDL Adagrad Light embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm. -// accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm. -// weights: Value of weights used in the MDL Adagrad Light optimization algorithm. -// benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingMDLAdagradLightParameters", - Input: []tf.Input{ - parameters, accumulators, weights, benefits, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// MapPeekAttr is an optional argument to MapPeek. -type MapPeekAttr func(optionalAttr) - -// MapPeekCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapPeekCapacity(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapPeekMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapPeekMemoryLimit(value int64) MapPeekAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapPeekContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapPeekContainer(value string) MapPeekAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapPeekSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapPeekSharedName(value string) MapPeekAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op peeks at the values at the specified key. If the -// -// underlying container does not contain this key -// this op will block until it does. -func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapPeek", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapPeek", err) - return - } - return values -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters. -type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve centered RMSProp embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the centered RMSProp optimization algorithm. -// ms: Parameter ms updated by the centered RMSProp optimization algorithm. -// mom: Parameter mom updated by the centered RMSProp optimization algorithm. -// mg: Parameter mg updated by the centered RMSProp optimization algorithm. -func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Transforms a vector of brain.Example protos (as strings) into typed tensors. -// -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// names: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this vector must be the same length as "serialized". -// sparse_keys: A list of Nsparse string Tensors (scalars). -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples' features associated with dense values. -// dense_defaults: A list of Ndense Tensors (some may be empty). -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// sparse_types: A list of Nsparse types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature -// given in dense_keys. -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseExample", - Input: []tf.Input{ - serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values -} - -// DatasetToGraphAttr is an optional argument to DatasetToGraph. -type DatasetToGraphAttr func(optionalAttr) - -// DatasetToGraphStatefulWhitelist sets the optional stateful_whitelist attribute to value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func DatasetToGraphStatefulWhitelist(value []string) DatasetToGraphAttr { - return func(m optionalAttr) { - m["stateful_whitelist"] = value - } -} - -// DatasetToGraphAllowStateful sets the optional allow_stateful attribute to value. -// If not specified, defaults to false -func DatasetToGraphAllowStateful(value bool) DatasetToGraphAttr { - return func(m optionalAttr) { - m["allow_stateful"] = value - } -} - -// DatasetToGraphStripDeviceAssignment sets the optional strip_device_assignment attribute to value. -// If not specified, defaults to false -func DatasetToGraphStripDeviceAssignment(value bool) DatasetToGraphAttr { - return func(m optionalAttr) { - m["strip_device_assignment"] = value - } -} - -// Returns a serialized GraphDef representing `input_dataset`. -// -// Returns a graph representation for `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to return the graph representation for. -// -// Returns The graph representation of the dataset (as serialized GraphDef). -func DatasetToGraph(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphAttr) (graph tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DatasetToGraph", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta. -type ResourceSparseApplyAdadeltaAttr func(optionalAttr) - -// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// var: Should be from a Variable(). -// -// Arguments: -// -// accum: Should be from a Variable(). -// accum_update: : Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdadelta", - Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes sigmoid of `x` element-wise. -// -// Specifically, `y = 1 / (1 + exp(-x))`. -func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sigmoid", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug. -type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve ADAM embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the ADAM optimization algorithm. -// momenta: Parameter momenta updated by the ADAM optimization algorithm. -// velocities: Parameter velocities updated by the ADAM optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the ADAM optimization algorithm. -func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam. -type ResourceApplyAdamAttr func(optionalAttr) - -// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, uses the nesterov update. -// If not specified, defaults to false -func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the Adam algorithm. -// -// $$\text{lr}_t := \mathrm{learning_rate} * \sqrt{1 - \beta_2^t} / (1 - \beta_1^t)$$ -// $$m_t := \beta_1 * m_{t-1} + (1 - \beta_1) * g$$ -// $$v_t := \beta_2 * v_{t-1} + (1 - \beta_2) * g * g$$ -// $$\text{variable} := \text{variable} - \text{lr}_t * m_t / (\sqrt{v_t} + \epsilon)$$ -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// beta2_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdam", - Input: []tf.Input{ - var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// CumsumAttr is an optional argument to Cumsum. -type CumsumAttr func(optionalAttr) - -// CumsumExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumsum. -// If not specified, defaults to false -func CumsumExclusive(value bool) CumsumAttr { - return func(m optionalAttr) { - m["exclusive"] = value - } -} - -// CumsumReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumsumReverse(value bool) CumsumAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative sum of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumsum, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumsum([a, b, c]) # => [a, a + b, a + b + c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is -// performed instead: -// -// ```python -// tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumsum is performed in the -// opposite direction: -// -// ```python -// tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: -// -// ```python -// tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] -// ``` -// -// Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Cumsum", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Performs gradient updates of embedding tables. -// -// Arguments: -// inputs: A TensorList of gradients with which to update embedding tables. -// This argument has the same length and shapes as the return value of -// RecvTPUEmbeddingActivations, but contains gradients of the model's loss -// with respect to the embedding activations. The embedding tables are updated -// from these gradients via the optimizer specified in the TPU embedding -// configuration given to tpu.initialize_system. -// learning_rates: A TensorList of float32 scalars, one for each dynamic learning -// rate tag: see the comments in -// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto. -// Multiple tables can share the same dynamic learning rate tag as specified -// in the configuration. If the learning rates for all tables are constant, -// this list should be empty. -// config: Serialized TPUEmbeddingConfiguration proto. -// -// Returns the created operation. -func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"config": config} - opspec := tf.OpSpec{ - Type: "SendTPUEmbeddingGradients", - Input: []tf.Input{ - tf.OutputList(inputs), tf.OutputList(learning_rates), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum. -type ResourceApplyKerasMomentumAttr func(optionalAttr) - -// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var + momentum * accum, so in the end, the var you get is actually -// var + momentum * accum. -// If not specified, defaults to false -func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// accum = accum * momentum - lr * grad -// var += accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyKerasMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2. -type TensorArrayConcatV2Attr func(optionalAttr) - -// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. -// If not specified, defaults to -func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr { - return func(m optionalAttr) { - m["element_shape_except0"] = value - } -} - -// Deprecated. Use TensorArrayConcatV3 -func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorArrayConcatV2", - Input: []tf.Input{ - handle, flow_in, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) - -// MatrixSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Solves systems of linear equations. -// -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. -// -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MatrixSolve", - Input: []tf.Input{ - matrix, rhs, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Writes contents to the file at input filename. Creates file and recursively -// -// creates directory if not existing. -// -// Arguments: -// filename: scalar. The name of the file to which we write the contents. -// contents: scalar. The content to be written to the output file. -// -// Returns the created operation. -func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WriteFile", - Input: []tf.Input{ - filename, contents, - }, - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. -type ResourceSparseApplyMomentumAttr func(optionalAttr) - -// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// That is for rows we have grad for, we update var and accum as follows: -// -// accum = accum * momentum + grad -// var -= lr * accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, indices, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RecvAttr is an optional argument to Recv. -type RecvAttr func(optionalAttr) - -// RecvClientTerminated sets the optional client_terminated attribute to value. -// -// value: If set to true, this indicates that the node was added -// to the graph as a result of a client-side feed or fetch of Tensor data, -// in which case the corresponding send or recv is expected to be managed -// locally by the caller. -// If not specified, defaults to false -func RecvClientTerminated(value bool) RecvAttr { - return func(m optionalAttr) { - m["client_terminated"] = value - } -} - -// Receives the named tensor from send_device on recv_device. -// -// Arguments: -// -// tensor_name: The name of the tensor to receive. -// send_device: The name of the device sending the tensor. -// send_device_incarnation: The current incarnation of send_device. -// recv_device: The name of the device receiving the tensor. -// -// Returns The tensor to receive. -func Recv(scope *Scope, tensor_type tf.DataType, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...RecvAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"tensor_type": tensor_type, "tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Recv", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OrderedMapStageAttr is an optional argument to OrderedMapStage. -type OrderedMapStageAttr func(optionalAttr) - -// OrderedMapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapStageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. -// If not specified, defaults to "" -func OrderedMapStageContainer(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapStageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. -// If not specified, defaults to "" -func OrderedMapStageSharedName(value string) OrderedMapStageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Stage (key, values) in the underlying container which behaves like a ordered -// -// associative container. Elements are ordered by key. -// -// Arguments: -// key: int64 -// -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// -// Returns the created operation. -func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OrderedMapStage", - Input: []tf.Input{ - key, indices, tf.OutputList(values), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata. -type TPUReplicateMetadataAttr func(optionalAttr) - -// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value. -// -// value: Number of cores per replica. Used for model parallelism. -// If not specified, defaults to 1 -func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["num_cores_per_replica"] = value - } -} - -// TPUReplicateMetadataTopology sets the optional topology attribute to value. -// -// value: TopologyProto indicating the topology of the TPU pod slice. -// If not specified, defaults to "" -func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["topology"] = value - } -} - -// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value. -// -// value: Whether to place the computation on the TPU. -// If not specified, defaults to true -func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["use_tpu"] = value - } -} - -// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value. -// -// value: The assignment of devices for the computation. -// If not specified, defaults to <> -func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["device_assignment"] = value - } -} - -// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value. -// -// value: DEPRECATED. Use num_cores_per_replica instead. -// If not specified, defaults to <> -func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["computation_shape"] = value - } -} - -// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value. -// If not specified, defaults to <> -func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["host_compute_core"] = value - } -} - -// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value. -// If not specified, defaults to <> -func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["padding_map"] = value - } -} - -// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value. -// If not specified, defaults to "STEP_MARK_AT_ENTRY" -func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["step_marker_location"] = value - } -} - -// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value. -// If not specified, defaults to false -func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr { - return func(m optionalAttr) { - m["allow_soft_placement"] = value - } -} - -// Metadata indicating how the TPU computation should be replicated. -// -// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph. -// -// Arguments: -// num_replicas: Number of replicas of the computation -// -// Returns the created operation. -func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_replicas": num_replicas} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TPUReplicateMetadata", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// TensorListConcatAttr is an optional argument to TensorListConcat. -type TensorListConcatAttr func(optionalAttr) - -// TensorListConcatElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// Concats all tensors in the list along the 0th dimension. -// -// Requires that all tensors have the same shape except the first dimension. -// -// input_handle: The input list. -// tensor: The concated result. -// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. -// -func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TensorListConcat", - Input: []tf.Input{ - input_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug. -type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load RMSProp embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the RMSProp optimization algorithm. -// ms: Value of ms used in the RMSProp optimization algorithm. -// mom: Value of mom used in the RMSProp optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug", - Input: []tf.Input{ - parameters, ms, mom, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Concatenates tensors along one dimension. -// -// Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). -// -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConcatV2", - Input: []tf.Input{ - tf.OutputList(values), axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug. -type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load FTRL embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the FTRL optimization algorithm. -// accumulators: Value of accumulators used in the FTRL optimization algorithm. -// linears: Value of linears used in the FTRL optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug", - Input: []tf.Input{ - parameters, accumulators, linears, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters. -type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adadelta embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adadelta optimization algorithm. -// accumulators: Value of accumulators used in the Adadelta optimization algorithm. -// updates: Value of updates used in the Adadelta optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdadeltaParameters", - Input: []tf.Input{ - parameters, accumulators, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. -type ResourceSparseApplyFtrlV2Attr func(optionalAttr) - -// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyFtrlV2MultiplyLinearByLr sets the optional multiply_linear_by_lr attribute to value. -// If not specified, defaults to false -func ResourceSparseApplyFtrlV2MultiplyLinearByLr(value bool) ResourceSparseApplyFtrlV2Attr { - return func(m optionalAttr) { - m["multiply_linear_by_lr"] = value - } -} - -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. -// -// That is for rows we have grad for, we update var, accum and linear as follows: -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regularization. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrlV2", - Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyAdagradV2Attr is an optional argument to ResourceSparseApplyAdagradV2. -type ResourceSparseApplyAdagradV2Attr func(optionalAttr) - -// ResourceSparseApplyAdagradV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradV2UseLocking(value bool) ResourceSparseApplyAdagradV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceSparseApplyAdagradV2UpdateSlots(value bool) ResourceSparseApplyAdagradV2Attr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. -// -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagradV2", - Input: []tf.Input{ - var_, accum, lr, epsilon, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Restore a Reader to its initial clean state. -// -// Arguments: -// reader_handle: Handle to a Reader. -// -// Returns the created operation. -func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderResetV2", - Input: []tf.Input{ - reader_handle, - }, - } - return scope.AddOperation(opspec) -} - -// FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient. -type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization; between 2 and 16, inclusive. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange sets the optional narrow_range attribute to value. -// -// value: Whether to quantize into 2^num_bits - 1 distinct values. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelGradientAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation. -// -// Arguments: -// gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation, -// shape one of: `[d]`, `[b, d]`, `[b, h, w, d]`. -// inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape -// same as `gradients`. -// min, max: Quantization interval, floats of shape `[d]`. -// -// -// -// Returns: -// backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as -// `inputs`: -// `gradients * (inputs >= min && inputs <= max)`. -// backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`: -// `sum_per_d(gradients * (inputs < min))`. -// backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`: -// `sum_per_d(gradients * (inputs > max))`. -func FakeQuantWithMinMaxVarsPerChannelGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVarsPerChannelGradient", - Input: []tf.Input{ - gradients, inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// PrintV2Attr is an optional argument to PrintV2. -type PrintV2Attr func(optionalAttr) - -// PrintV2OutputStream sets the optional output_stream attribute to value. -// -// value: A string specifying the output stream or logging level to print to. -// If not specified, defaults to "stderr" -func PrintV2OutputStream(value string) PrintV2Attr { - return func(m optionalAttr) { - m["output_stream"] = value - } -} - -// PrintV2End sets the optional end attribute to value. -// If not specified, defaults to "\n" -func PrintV2End(value string) PrintV2Attr { - return func(m optionalAttr) { - m["end"] = value - } -} - -// Prints a string scalar. -// -// Prints a string scalar to the desired output_stream. -// -// Arguments: -// input: The string scalar to print. -// -// Returns the created operation. -func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PrintV2", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Inserts a dimension of 1 into a tensor's shape. -// -// Given a tensor `input`, this operation inserts a dimension of 1 at the -// dimension index `axis` of `input`'s shape. The dimension index `axis` starts at -// zero; if you specify a negative number for `axis` it is counted backward from -// the end. -// -// This operation is useful if you want to add a batch dimension to a single -// element. For example, if you have a single image of shape `[height, width, -// channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`, -// which will make the shape `[1, height, width, channels]`. -// -// Other examples: -// -// ``` -// # 't' is a tensor of shape [2] -// shape(expand_dims(t, 0)) ==> [1, 2] -// shape(expand_dims(t, 1)) ==> [2, 1] -// shape(expand_dims(t, -1)) ==> [2, 1] -// -// # 't2' is a tensor of shape [2, 3, 5] -// shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5] -// shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5] -// shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1] -// ``` -// -// This operation requires that: -// -// `-1-input.dims() <= dim <= input.dims()` -// -// This operation is related to `squeeze()`, which removes dimensions of -// size 1. -// -// Arguments: -// -// axis: 0-D (scalar). Specifies the dimension index at which to -// expand the shape of `input`. Must be in the range -// `[-rank(input) - 1, rank(input)]`. -// -// Returns Contains the same data as `input`, but its shape has an additional -// dimension of size 1 added. -func ExpandDims(scope *Scope, input tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExpandDims", - Input: []tf.Input{ - input, axis, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. -type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) - -// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Sparse update '*var' as FOBOS algorithm with fixed learning rate. -// -// That is for rows we have grad for, we update var as follows: -// prox_v = var - alpha * grad -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalGradientDescent", - Input: []tf.Input{ - var_, alpha, l1, l2, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// SparseMatrixTransposeAttr is an optional argument to SparseMatrixTranspose. -type SparseMatrixTransposeAttr func(optionalAttr) - -// SparseMatrixTransposeConjugate sets the optional conjugate attribute to value. -// -// value: Indicates whether `input` should be conjugated. -// If not specified, defaults to false -func SparseMatrixTransposeConjugate(value bool) SparseMatrixTransposeAttr { - return func(m optionalAttr) { - m["conjugate"] = value - } -} - -// Transposes the inner (matrix) dimensions of a CSRSparseMatrix. -// -// Transposes the inner (matrix) dimensions of a SparseMatrix and optionally -// conjugates its values. -// -// Arguments: -// input: A CSRSparseMatrix. -// -// -// Returns A CSRSparseMatrix. -func SparseMatrixTranspose(scope *Scope, input tf.Output, type_ tf.DataType, optional ...SparseMatrixTransposeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseMatrixTranspose", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters. -type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersConfig(value string) LoadTPUEmbeddingProximalAdagradParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load proximal Adagrad embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingProximalAdagradParameters", - Input: []tf.Input{ - parameters, accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. -type ResourceSparseApplyAdagradDAAttr func(optionalAttr) - -// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. -// -// Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagradDA", - Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: -// -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. -// -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. -// -// Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmax", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes hyperbolic cosine of x element-wise. -// -// Given an input tensor, this function computes hyperbolic cosine of every -// element in the tensor. Input range is `[-inf, inf]` and output range -// is `[1, inf]`. -// -// ```python -// x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")]) -// tf.math.cosh(x) ==> [inf 4.0515420e+03 1.1276259e+00 1.5430807e+00 1.8106556e+00 3.7621956e+00 1.1013233e+04 inf] -// ``` -func Cosh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Cosh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CollectiveReduceAttr is an optional argument to CollectiveReduce. -type CollectiveReduceAttr func(optionalAttr) - -// CollectiveReduceWaitFor sets the optional wait_for attribute to value. -// If not specified, defaults to <> -func CollectiveReduceWaitFor(value []int64) CollectiveReduceAttr { - return func(m optionalAttr) { - m["wait_for"] = value - } -} - -// CollectiveReduceCommunicationHint sets the optional communication_hint attribute to value. -// If not specified, defaults to "auto" -func CollectiveReduceCommunicationHint(value string) CollectiveReduceAttr { - return func(m optionalAttr) { - m["communication_hint"] = value - } -} - -// CollectiveReduceTimeoutSeconds sets the optional timeout_seconds attribute to value. -// If not specified, defaults to 0 -func CollectiveReduceTimeoutSeconds(value float32) CollectiveReduceAttr { - return func(m optionalAttr) { - m["timeout_seconds"] = value - } -} - -// Mutually reduces multiple tensors of identical type and shape. -func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64, optional ...CollectiveReduceAttr) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CollectiveReduce", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. -type ResourceApplyAdaMaxAttr func(optionalAttr) - -// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the AdaMax algorithm. -// -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// v_t <- max(beta2 * v_{t-1}, abs(g)) -// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) -// -// Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdaMax", - Input: []tf.Input{ - var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the LSTM cell backward propagation for the entire time sequence. -// -// This implementation is to be used in conjunction of BlockLSTMV2. -// -// Arguments: -// seq_len_max: Maximum time length actually used by this input. Outputs are padded -// with zeros beyond this length. -// x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs). -// cs_prev: Value of the initial cell state. -// h_prev: Initial output of cell (to be used for peephole). -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// i: The input gate over the whole time sequence. -// cs: The cell state before the tanh over the whole time sequence. -// f: The forget gate over the whole time sequence. -// o: The output gate over the whole time sequence. -// ci: The cell input over the whole time sequence. -// co: The cell after the tanh over the whole time sequence. -// h: The output h vector over the whole time sequence. -// cs_grad: The current gradient of cs. -// h_grad: The gradient of h vector. -// use_peephole: Whether to use peephole weights. -// -// Returns: -// x_grad: The gradient of x to be back-propped. -// cs_prev_grad: The gradient of cs_prev to be back-propped. -// h_prev_grad: The gradient of h_prev to be back-propped. -// w_grad: The gradient for w to be back-propped. -// wci_grad: The gradient for wci to be back-propped. -// wcf_grad: The gradient for wcf to be back-propped. -// wco_grad: The gradient for wco to be back-propped. -// b_grad: The gradient for w to be back-propped. -func BlockLSTMGradV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"use_peephole": use_peephole} - opspec := tf.OpSpec{ - Type: "BlockLSTMGradV2", - Input: []tf.Input{ - seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7) -} - -// Returns the element-wise max of two SparseTensors. -// -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. -// -// Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. -// -// Returns: -// output_indices: 2-D. The indices of the output SparseTensor. -// output_values: 1-D. The values of the output SparseTensor. -func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSparseMaximum", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes the Bessel i1e function of `x` element-wise. -// -// Exponentially scaled modified Bessel function of order 0 defined as -// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`. -// -// This function is faster and numerically stabler than `bessel_i1(x)`. -func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BesselI1e", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LSTMBlockCellAttr is an optional argument to LSTMBlockCell. -type LSTMBlockCellAttr func(optionalAttr) - -// LSTMBlockCellForgetBias sets the optional forget_bias attribute to value. -// -// value: The forget gate bias. -// If not specified, defaults to 1 -func LSTMBlockCellForgetBias(value float32) LSTMBlockCellAttr { - return func(m optionalAttr) { - m["forget_bias"] = value - } -} - -// LSTMBlockCellCellClip sets the optional cell_clip attribute to value. -// -// value: Value to clip the 'cs' value to. -// If not specified, defaults to 3 -func LSTMBlockCellCellClip(value float32) LSTMBlockCellAttr { - return func(m optionalAttr) { - m["cell_clip"] = value - } -} - -// LSTMBlockCellUsePeephole sets the optional use_peephole attribute to value. -// -// value: Whether to use peephole weights. -// If not specified, defaults to false -func LSTMBlockCellUsePeephole(value bool) LSTMBlockCellAttr { - return func(m optionalAttr) { - m["use_peephole"] = value - } -} - -// Computes the LSTM cell forward propagation for 1 time step. -// -// This implementation uses 1 weight matrix and 1 bias vector, and there's an -// optional peephole connection. -// -// This kernel op implements the following mathematical equations: -// -// ```python -// xh = [x, h_prev] -// [i, f, ci, o] = xh * w + b -// f = f + forget_bias -// -// if not use_peephole: -// wci = wcf = wco = 0 -// -// i = sigmoid(cs_prev * wci + i) -// f = sigmoid(cs_prev * wcf + f) -// ci = tanh(ci) -// -// cs = ci .* i + cs_prev .* f -// cs = clip(cs, cell_clip) -// -// o = sigmoid(cs * wco + o) -// co = tanh(cs) -// h = co .* o -// ``` -// -// Arguments: -// x: The input to the LSTM cell, shape (batch_size, num_inputs). -// cs_prev: Value of the cell state at previous time step. -// h_prev: Output of the previous cell at previous time step. -// w: The weight matrix. -// wci: The weight matrix for input gate peephole connection. -// wcf: The weight matrix for forget gate peephole connection. -// wco: The weight matrix for output gate peephole connection. -// b: The bias vector. -// -// Returns: -// i: The input gate. -// cs: The cell state before the tanh. -// f: The forget gate. -// o: The output gate. -// ci: The cell input. -// co: The cell after the tanh. -// h: The output h vector. -func LSTMBlockCell(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...LSTMBlockCellAttr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LSTMBlockCell", - Input: []tf.Input{ - x, cs_prev, h_prev, w, wci, wcf, wco, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// A TPU core selector Op. -// -// This Op produces a set of TPU cores (for warm-up) or a single TPU core -// (for regular inference) to execute the TPU program on. The output is -// consumed by TPUPartitionedCall. -// -// Returns A vector 1 or more TPU cores. -func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TPUOrdinalSelector", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SubstrAttr is an optional argument to Substr. -type SubstrAttr func(optionalAttr) - -// SubstrUnit sets the optional unit attribute to value. -// -// value: The unit that is used to create the substring. One of: `"BYTE"` (for -// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8 -// encoded Unicode code points). The default is `"BYTE"`. Results are undefined if -// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid -// UTF-8. -// If not specified, defaults to "BYTE" -func SubstrUnit(value string) SubstrAttr { - return func(m optionalAttr) { - m["unit"] = value - } -} - -// Return substrings from `Tensor` of strings. -// -// For each string in the input `Tensor`, creates a substring starting at index -// `pos` with a total length of `len`. -// -// If `len` defines a substring that would extend beyond the length of the input -// string, or if `len` is negative, then as many characters as possible are used. -// -// A negative `pos` indicates distance within the string backwards from the end. -// -// If `pos` specifies an index which is out of range for any of the input strings, -// then an `InvalidArgumentError` is thrown. -// -// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on -// Op creation. -// -// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about -// broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// --- -// -// Examples -// -// Using scalar `pos` and `len`: -// -// ```python -// input = [b'Hello', b'World'] -// position = 1 -// length = 3 -// -// output = [b'ell', b'orl'] -// ``` -// -// Using `pos` and `len` with same shape as `input`: -// -// ```python -// input = [[b'ten', b'eleven', b'twelve'], -// [b'thirteen', b'fourteen', b'fifteen'], -// [b'sixteen', b'seventeen', b'eighteen']] -// position = [[1, 2, 3], -// [1, 2, 3], -// [1, 2, 3]] -// length = [[2, 3, 4], -// [4, 3, 2], -// [5, 5, 5]] -// -// output = [[b'en', b'eve', b'lve'], -// [b'hirt', b'urt', b'te'], -// [b'ixtee', b'vente', b'hteen']] -// ``` -// -// Broadcasting `pos` and `len` onto `input`: -// -// ``` -// input = [[b'ten', b'eleven', b'twelve'], -// [b'thirteen', b'fourteen', b'fifteen'], -// [b'sixteen', b'seventeen', b'eighteen'], -// [b'nineteen', b'twenty', b'twentyone']] -// position = [1, 2, 3] -// length = [1, 2, 3] -// -// output = [[b'e', b'ev', b'lve'], -// [b'h', b'ur', b'tee'], -// [b'i', b've', b'hte'], -// [b'i', b'en', b'nty']] -// ``` -// -// Broadcasting `input` onto `pos` and `len`: -// -// ``` -// input = b'thirteen' -// position = [1, 5, 7] -// length = [3, 2, 1] -// -// output = [b'hir', b'ee', b'n'] -// ``` -// -// Raises: -// -// * `ValueError`: If the first argument cannot be converted to a -// Tensor of `dtype string`. -// * `InvalidArgumentError`: If indices are out of range. -// * `ValueError`: If `pos` and `len` are not the same shape. -// -// -// Arguments: -// input: Tensor of strings -// pos: Scalar defining the position of first character in each substring -// len: Scalar defining the number of characters to include in each substring -// -// Returns Tensor of substrings -func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Substr", - Input: []tf.Input{ - input, pos, len, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Convert JSON-encoded Example records to binary protocol buffer strings. -// -// This op translates a tensor containing Example records, encoded using -// the [standard JSON -// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), -// into a tensor containing the same records encoded as binary protocol -// buffers. The resulting tensor can then be fed to any of the other -// Example-parsing ops. -// -// Arguments: -// json_examples: Each string is a JSON object serialized according to the JSON -// mapping of the Example proto. -// -// Returns Each string is a binary Example protocol buffer corresponding -// to the respective element of `json_examples`. -func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeJSONExample", - Input: []tf.Input{ - json_examples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Elementwise computes the bitwise AND of `x` and `y`. -// -// The result will have those bits set, that are set in both `x` and `y`. The -// computation is performed on the underlying representations of `x` and `y`. -// -// For example: -// -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64, -// tf.uint8, tf.uint16, tf.uint32, tf.uint64] -// -// for dtype in dtype_list: -// lhs = tf.constant([0, 5, 3, 14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// exp = tf.constant([0, 0, 3, 10], dtype=tf.float32) -// -// res = bitwise_ops.bitwise_and(lhs, rhs) -// tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE -// ``` -// -func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BitwiseAnd", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) - -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. -// -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["field_delim"] = value - } -} - -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. -// -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). -// If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { - return func(m optionalAttr) { - m["use_quote_delim"] = value - } -} - -// DecodeCSVNaValue sets the optional na_value attribute to value. -// -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value - } -} - -// DecodeCSVSelectCols sets the optional select_cols attribute to value. -// If not specified, defaults to <> -func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { - return func(m optionalAttr) { - m["select_cols"] = value - } -} - -// Convert CSV records to tensors. Each column maps to one tensor. -// -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. -// -// Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or an empty vector if the column is -// required. -// -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeCSV", - Input: []tf.Input{ - records, tf.OutputList(record_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return - } - return output -} - -// SerializeIteratorAttr is an optional argument to SerializeIterator. -type SerializeIteratorAttr func(optionalAttr) - -// SerializeIteratorExternalStatePolicy sets the optional external_state_policy attribute to value. -// If not specified, defaults to 0 -func SerializeIteratorExternalStatePolicy(value int64) SerializeIteratorAttr { - return func(m optionalAttr) { - m["external_state_policy"] = value - } -} - -// Converts the given `resource_handle` representing an iterator to a variant tensor. -// -// Arguments: -// resource_handle: A handle to an iterator resource. -// -// Returns A variant tensor storing the state of the iterator contained in the -// resource. -func SerializeIterator(scope *Scope, resource_handle tf.Output, optional ...SerializeIteratorAttr) (serialized tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeIterator", - Input: []tf.Input{ - resource_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp. -type ResourceApplyCenteredRMSPropAttr func(optionalAttr) - -// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) -// -// mg <- rho * mg_{t-1} + (1-rho) * grad -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyCenteredRMSProp", - Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// UnsortedSegmentJoinAttr is an optional argument to UnsortedSegmentJoin. -type UnsortedSegmentJoinAttr func(optionalAttr) - -// UnsortedSegmentJoinSeparator sets the optional separator attribute to value. -// -// value: The separator to use when joining. -// If not specified, defaults to "" -func UnsortedSegmentJoinSeparator(value string) UnsortedSegmentJoinAttr { - return func(m optionalAttr) { - m["separator"] = value - } -} - -// Joins the elements of `inputs` based on `segment_ids`. -// -// Computes the string join along segments of a tensor. -// Given `segment_ids` with rank `N` and `data` with rank `N+M`: -// -// `output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])` -// -// where the join is over all [j1...jN] such that segment_ids[j1...jN] = i. -// Strings are joined in row-major order. -// -// For example: -// -// ```python -// inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']] -// output_array = string_ops.unsorted_segment_join(inputs=inputs, -// segment_ids=[1, 0, 1], -// num_segments=2, -// separator=':')) -// # output_array ==> [['Y', '6', '6'], ['Y:p', 'q:G', 'c:a']] -// -// -// inputs = ['this', 'is', 'a', 'test'] -// output_array = string_ops.unsorted_segment_join(inputs=inputs, -// segment_ids=[0, 0, 0, 0], -// num_segments=1, -// separator=':')) -// # output_array ==> ['this:is:a:test'] -// ``` -// -// Arguments: -// inputs: The input to be joined. -// segment_ids: A tensor whose shape is a prefix of data.shape. Negative segment ids are not -// supported. -// num_segments: A scalar. -func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output, num_segments tf.Output, optional ...UnsortedSegmentJoinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentJoin", - Input: []tf.Input{ - inputs, segment_ids, num_segments, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LuAttr is an optional argument to Lu. -type LuAttr func(optionalAttr) - -// LuOutputIdxType sets the optional output_idx_type attribute to value. -// If not specified, defaults to DT_INT32 -func LuOutputIdxType(value tf.DataType) LuAttr { - return func(m optionalAttr) { - m["output_idx_type"] = value - } -} - -// Computes the LU decomposition of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. -// -// The input has to be invertible. -// -// The output consists of two tensors LU and P containing the LU decomposition -// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and -// upper triangular factors. -// -// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of -// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower -// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose -// entries correspond to the upper triangular part, including the diagonal, of LU. -// -// P represents a permutation matrix encoded as a list of indices each between `0` -// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to -// P, then the L, U and P satisfies P_mat * input = L * U. -// -// Arguments: -// input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of -// size `[M, M]`. -// -// Returns: -// lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the -// lower triangular factor `L` with unit diagonal, and whose upper triangular part -// denotes the upper triangular factor `U`. -// p: Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is -// `[..., M]`. -// @compatibility(scipy) -// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are -// packed into a single tensor, the permutation is applied to `input` instead of -// the right hand side and the permutation `P` is returned as a list of indices -// instead of a permutation matrix. -// @end_compatibility -func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Lu", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Outputs deterministic pseudorandom random numbers from a Poisson distribution. -// -// Outputs random values from a Poisson distribution. -// -// The outputs are a deterministic function of `shape`, `seed`, and `lam`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// lam: The rate of the Poisson distribution. Shape must match the rightmost dimensions -// of `shape`. -// dtype: The type of the output. -// -// Returns Random values with specified shape. -func StatelessRandomPoisson(scope *Scope, shape tf.Output, seed tf.Output, lam tf.Output, dtype tf.DataType) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "StatelessRandomPoisson", - Input: []tf.Input{ - shape, seed, lam, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of `NOT x` element-wise. -// -// Arguments: -// x: A `Tensor` of type `bool`. -// -// Returns A `Tensor` of type `bool` with the same shape as `x`. The logical negation of `x`. -func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalNot", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImageProjectiveTransformV2Attr is an optional argument to ImageProjectiveTransformV2. -type ImageProjectiveTransformV2Attr func(optionalAttr) - -// ImageProjectiveTransformV2FillMode sets the optional fill_mode attribute to value. -// -// value: Fill mode, "REFLECT", "WRAP", or "CONSTANT". -// If not specified, defaults to "CONSTANT" -func ImageProjectiveTransformV2FillMode(value string) ImageProjectiveTransformV2Attr { - return func(m optionalAttr) { - m["fill_mode"] = value - } -} - -// Applies the given transform to each of the images. -// -// If one row of `transforms` is `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps -// the *output* point `(x, y)` to a transformed *input* point -// `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where -// `k = c0 x + c1 y + 1`. If the transformed point lays outside of the input -// image, the output pixel is set to 0. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// transforms: 2-D Tensor, `[batch, 8]` or `[1, 8]` matrix, where each row corresponds to a 3 x 3 -// projective transformation matrix, with the last entry assumed to be 1. If there -// is one row, the same transformation will be applied to all images. -// output_shape: 1-D Tensor [new_height, new_width]. -// interpolation: Interpolation method, "NEAREST" or "BILINEAR". -// -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ImageProjectiveTransformV2(scope *Scope, images tf.Output, transforms tf.Output, output_shape tf.Output, interpolation string, optional ...ImageProjectiveTransformV2Attr) (transformed_images tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"interpolation": interpolation} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ImageProjectiveTransformV2", - Input: []tf.Input{ - images, transforms, output_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes rectified linear gradients for a Relu operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Relu operation. -// features: The features passed as input to the corresponding Relu operation, OR -// the outputs of that operation (both work equivalently). -// -// Returns `gradients * (features > 0)`. -func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReluGrad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) - -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// accum = accum * momentum + grad -// var -= lr * accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// IRFFTAttr is an optional argument to IRFFT. -type IRFFTAttr func(optionalAttr) - -// IRFFTTreal sets the optional Treal attribute to value. -// If not specified, defaults to DT_FLOAT -func IRFFTTreal(value tf.DataType) IRFFTAttr { - return func(m optionalAttr) { - m["Treal"] = value - } -} - -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. -// -// Arguments: -// input: A complex tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "IRFFT", - Input: []tf.Input{ - input, fft_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch. -type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value. -// -// value: A list of string scalars, one for each embedding table that specify -// how to normalize the embedding activations after weighted summation. -// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have -// the sum of the weights be 0 for 'mean' or the sum of the squared weights be -// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for -// all tables. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr { - return func(m optionalAttr) { - m["combiners"] = value - } -} - -// An op that enqueues TPUEmbedding input indices from a SparseTensor. -// -// This Op eases the porting of code that uses embedding_lookup_sparse(), -// although some Python preprocessing of the SparseTensor arguments to -// embedding_lookup_sparse() is required to produce the arguments to this Op, -// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training -// step. -// -// The tensors at corresponding positions in the three input lists -// must have the same shape, i.e. rank 1 with dim_size() equal to the total -// number of lookups into the table described by the corresponding table_id. -// -// Arguments: -// sample_indices: A list of rank 1 Tensors specifying the training example and -// feature to which the corresponding embedding_indices and aggregation_weights -// values belong. sample_indices[i] must equal b * nf + f, where nf is the -// number of features from the corresponding table, f is in [0, nf), and -// b is in [0, batch size). -// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -// aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per -// (training example, feature) -- aggregation weights. -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// -// Returns the created operation. -func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingSparseBatch", - Input: []tf.Input{ - tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. -type ResourceScatterNdUpdateAttr func(optionalAttr) - -// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. -// -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Applies sparse `updates` to individual values or slices within a given -// -// variable according to `indices`. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -// ``` -// -// For example, say we want to update 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that update would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1] ,[7]]) -// updates = tf.constant([9, 10, 11, 12]) -// update = tf.scatter_nd_update(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(update) -// ``` -// -// The resulting update to ref would look like this: -// -// [1, 11, 3, 10, 9, 6, 7, 12] -// -// See `tf.scatter_nd` for more details about how to make updates to -// slices. -// -// Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of updated -// values to add to ref. -// -// Returns the created operation. -func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceScatterNdUpdate", - Input: []tf.Input{ - ref, indices, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes square root of x element-wise. -// -// I.e., \\(y = \sqrt{x} = x^{1/2}\\). -func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sqrt", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor. -// -// This operation folds the padded areas of `input` by `MirrorPad` according to the -// `paddings` you specify. `paddings` must be the same as `paddings` argument -// given to the corresponding `MirrorPad` op. -// -// The folded size of each dimension D of the output is: -// -// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]]. -// # 'paddings' is [[0, 1]], [0, 1]]. -// # 'mode' is SYMMETRIC. -// # rank of 't' is 2. -// pad(t, paddings) ==> [[ 1, 5] -// [11, 28]] -// ``` -// -// Arguments: -// input: The input tensor to be folded. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// mode: The mode used in the `MirrorPad` op. -// -// Returns The folded tensor. -func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"mode": mode} - opspec := tf.OpSpec{ - Type: "MirrorPadGrad", - Input: []tf.Input{ - input, paddings, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces the max pool of the input tensor for quantized types. -// -// Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns: -// output -// min_output: The float value that the lowest quantized output value represents. -// max_output: The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) - -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// The gradient of SparseFillEmptyRows. -// -// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, -// shaped `[N_full]`, where `N_full >= N` and copies data into either -// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and -// `d_default_value` is a scalar. -// -// d_values[j] = grad_values[reverse_index_map[j]] -// d_default_value = sum_{k : 0 .. N_full - 1} ( -// grad_values[k] * 1{k not in reverse_index_map}) -// -// Arguments: -// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. -// grad_values: 1-D. The gradients from backprop. -// -// Returns: -// d_values: 1-D. The backprop into values. -// d_default_value: 0-D. The backprop into default_value. -func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseFillEmptyRowsGrad", - Input: []tf.Input{ - reverse_index_map, grad_values, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. -type MaxPool3DGradAttr func(optionalAttr) - -// MaxPool3DGradDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of 3D max pooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPool3DGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. -type ResourceApplyRMSPropAttr func(optionalAttr) - -// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyRMSProp", - Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Reshapes a SparseTensor to represent values in a new dense shape. -// -// This operation has the same semantics as reshape on the represented dense -// tensor. The `input_indices` are recomputed based on the requested `new_shape`. -// -// If one component of `new_shape` is the special value -1, the size of that -// dimension is computed so that the total dense size remains constant. At -// most one component of `new_shape` can be -1. The number of dense elements -// implied by `new_shape` must be the same as the number of dense elements -// originally implied by `input_shape`. -// -// Reshaping does not affect the order of values in the SparseTensor. -// -// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` -// has length `R_out`, then `input_indices` has shape `[N, R_in]`, -// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and -// `output_shape` has length `R_out`. -// -// Arguments: -// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a -// SparseTensor. -// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. -// new_shape: 1-D. `R_out` vector with the requested new dense shape. -// -// Returns: -// output_indices: 2-D. `N x R_out` matrix with the updated indices of non-empty -// values in the output SparseTensor. -// output_shape: 1-D. `R_out` vector with the full dense shape of the output -// SparseTensor. This is the same as `new_shape` but with any -1 dimensions -// filled in. -func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseReshape", - Input: []tf.Input{ - input_indices, input_shape, new_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Elementwise computes the bitwise left-shift of `x` and `y`. -// -// If `y` is negative, or greater than or equal to the width of `x` in bits the -// result is implementation defined. -// -// Example: -// -// ```python -// import tensorflow as tf -// from tensorflow.python.ops import bitwise_ops -// import numpy as np -// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64] -// -// for dtype in dtype_list: -// lhs = tf.constant([-1, -5, -3, -14], dtype=dtype) -// rhs = tf.constant([5, 0, 7, 11], dtype=dtype) -// -// left_shift_result = bitwise_ops.left_shift(lhs, rhs) -// -// print(left_shift_result) -// -// # This will print: -// # tf.Tensor([ -32 -5 -128 0], shape=(4,), dtype=int8) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int16) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int32) -// # tf.Tensor([ -32 -5 -384 -28672], shape=(4,), dtype=int64) -// -// lhs = np.array([-2, 64, 101, 32], dtype=np.int8) -// rhs = np.array([-1, -5, -3, -14], dtype=np.int8) -// bitwise_ops.left_shift(lhs, rhs) -// # -// ``` -// -func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LeftShift", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Generates a feature cross from a list of tensors, and returns it as a -// RaggedTensor. See `tf.ragged.cross` for more details. -// -// Arguments: -// ragged_values: The values tensor for each RaggedTensor input. -// ragged_row_splits: The row_splits tensor for each RaggedTensor input. -// sparse_indices: The indices tensor for each SparseTensor input. -// sparse_values: The values tensor for each SparseTensor input. -// sparse_shape: The dense_shape tensor for each SparseTensor input. -// dense_inputs: The tf.Tensor inputs. -// input_order: String specifying the tensor type for each input. The `i`th character in -// this string specifies the type of the `i`th input, and is one of: 'R' (ragged), -// 'D' (dense), or 'S' (sparse). This attr is used to ensure that the crossed -// values are combined in the order of the inputs from the call to tf.ragged.cross. -// -// -// -// -// -// -// Returns: -// output_values: The `values` for the returned `RaggedTensor`. -// output_row_splits: The `row_splits` for the returned `RaggedTensor`. -func RaggedCross(scope *Scope, ragged_values []tf.Output, ragged_row_splits []tf.Output, sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shape []tf.Output, dense_inputs []tf.Output, input_order string, hashed_output bool, num_buckets int64, hash_key int64, out_values_type tf.DataType, out_row_splits_type tf.DataType) (output_values tf.Output, output_row_splits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"input_order": input_order, "hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_values_type": out_values_type, "out_row_splits_type": out_row_splits_type} - opspec := tf.OpSpec{ - Type: "RaggedCross", - Input: []tf.Input{ - tf.OutputList(ragged_values), tf.OutputList(ragged_row_splits), tf.OutputList(sparse_indices), tf.OutputList(sparse_values), tf.OutputList(sparse_shape), tf.OutputList(dense_inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softmax cross entropy cost and gradients to backpropagate. -// -// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept -// a matrix of label probabilities, but rather a single label per row -// of features. This label is considered to have probability 1.0 for the -// given row. -// -// Inputs are the logits, not probabilities. -// -// Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size vector with values in [0, num_classes). -// This is the label for the given minibatch entry. -// -// Returns: -// loss: Per example loss (batch_size vector). -// backprop: backpropagated gradients (batch_size x num_classes matrix). -func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmaxCrossEntropyWithLogits", - Input: []tf.Input{ - features, labels, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Worker heartbeat op. -// -// Heartbeats may be sent periodically to indicate the coordinator is still active, -// to retrieve the current worker status and to expedite shutdown when necessary. -// -// Arguments: -// request: A string tensor containing a serialized WorkerHeartbeatRequest -// -// Returns A string tensor containing a serialized WorkerHeartbeatResponse -func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "WorkerHeartbeat", - Input: []tf.Input{ - request, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. -type ResourceApplyProximalGradientDescentAttr func(optionalAttr) - -// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' as FOBOS algorithm with fixed learning rate. -// -// prox_v = var - alpha * delta -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// delta: The change. -// -// Returns the created operation. -func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyProximalGradientDescent", - Input: []tf.Input{ - var_, alpha, l1, l2, delta, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) - -// RandomUniformSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomUniformSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. -// -// Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. -// -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomUniform", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug. -type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Adadelta embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Adadelta optimization algorithm. -// accumulators: Parameter accumulators updated by the Adadelta optimization algorithm. -// updates: Parameter updates updated by the Adadelta optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm. -func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// -// Returns: -// stamp_token: Stamp token of the tree ensemble resource. -// num_trees: The number of trees in the tree ensemble resource. -// num_finalized_trees: The number of trees that were finished successfully. -// num_attempted_layers: The number of layers we attempted to build (but not necessarily succeeded). -// last_layer_nodes_range: Rank size 2 tensor that contains start and end ids of the nodes in the latest -// layer. -func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesGetEnsembleStates", - Input: []tf.Input{ - tree_ensemble_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd. -type ResourceScatterNdAddAttr func(optionalAttr) - -// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value. -// -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Applies sparse addition to individual values or slices in a Variable. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] -// ``` -// -// For example, say we want to add 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that addition would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// add = tf.scatter_nd_add(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(add) -// ``` -// -// The resulting update to ref would look like this: -// -// [1, 13, 3, 14, 14, 6, 7, 20] -// -// See `tf.scatter_nd` for more details about how to make updates to -// slices. -// -// Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of -// values to add to ref. -// -// Returns the created operation. -func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceScatterNdAdd", - Input: []tf.Input{ - ref, indices, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. -// -// Values in `arr` outside of the range [0, size) are ignored. -// -// Arguments: -// arr: int32 `Tensor`. -// size: non-negative int32 scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. -// -// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for -// each value in the range [0, size). -func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Bincount", - Input: []tf.Input{ - arr, size, weights, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gradients for batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. See `tf.nn.batch_normalization`. -// -// Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this Tensor will be multiplied -// with the normalized Tensor. -// backprop: 4D backprop Tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -// -// Returns: -// dx: 4D backprop tensor for input. -// dm: 1D backprop tensor for mean. -// dv: 1D backprop tensor for variance. -// db: 1D backprop tensor for beta. -// dg: 1D backprop tensor for gamma. -func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} - opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalizationGrad", - Input: []tf.Input{ - t, m, v, gamma, backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) -} - -// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. -type CropAndResizeGradImageAttr func(optionalAttr) - -// CropAndResizeGradImageMethod sets the optional method attribute to value. -// -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// Computes the gradient of the crop_and_resize op wrt the input image tensor. -// -// Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` -// containing the original image size. Both `image_height` and `image_width` need -// to be positive. -// -// -// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CropAndResizeGradImage", - Input: []tf.Input{ - grads, boxes, box_ind, image_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// OutfeedDequeueAttr is an optional argument to OutfeedDequeue. -type OutfeedDequeueAttr func(optionalAttr) - -// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// Retrieves a single tensor from the computation outfeed. -// -// This operation will block indefinitely until data is available. -// -// Arguments: -// dtype: The type of elements in the tensor. -// shape: The shape of the tensor. -// -// Returns A tensor that will be read from the device outfeed. -func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "OutfeedDequeue", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// An Op to sum inputs across replicated TPU instances. -// -// Each instance supplies its own input. -// -// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`. -// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0, -// and `B, D, F, H` as group 1. Thus we get the outputs: -// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`. -// -// Arguments: -// input: The local input to the sum. -// group_assignment: An int32 tensor with shape -// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the -// replica ids in the ith subgroup. -// -// Returns The sum of all the distributed inputs. -func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "CrossReplicaSum", - Input: []tf.Input{ - input, group_assignment, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnqueueTPUEmbeddingRaggedTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingRaggedTensorBatch. -type EnqueueTPUEmbeddingRaggedTensorBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingRaggedTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// EnqueueTPUEmbeddingRaggedTensorBatchCombiners sets the optional combiners attribute to value. -// -// value: A list of string scalars, one for each embedding table that specify -// how to normalize the embedding activations after weighted summation. -// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have -// the sum of the weights be 0 for 'mean' or the sum of the squared weights be -// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for -// all tables. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingRaggedTensorBatchCombiners(value []string) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["combiners"] = value - } -} - -// EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingRaggedTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingRaggedTensorBatchAttr { - return func(m optionalAttr) { - m["max_sequence_lengths"] = value - } -} - -// Eases the porting of code that uses tf.nn.embedding_lookup(). -// -// sample_splits[i], embedding_indices[i] and aggregation_weights[i] correspond -// to the ith feature. table_ids[i] indicates which embedding table to look up ith -// feature. -// -// The tensors at corresponding positions in two of the input lists, -// embedding_indices and aggregation_weights, must have the same shape, i.e. rank 1 -// with dim_size() equal to the total number of lookups into the table described by -// the corresponding feature. -// -// Arguments: -// sample_splits: A list of rank 1 Tensors specifying the break points for splitting -// embedding_indices and aggregation_weights into rows. -// It corresponds to ids.row_splits in embedding_lookup(), when ids is a -// RaggedTensor. -// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -// It corresponds to ids.values in embedding_lookup(), when ids is a RaggedTensor. -// aggregation_weights: A list of rank 1 Tensors containing per training example -// aggregation weights. It corresponds to the values field of a RaggedTensor -// with the same row_splits as ids in embedding_lookup(), when ids is a -// RaggedTensor. -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// table_ids: A list of integers specifying the identifier of the embedding table -// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the -// corresponding input. The ith input is looked up using table_ids[i]. The size -// of the table_ids list must be equal to that of sample_indices, -// embedding_indices and aggregation_weights. -// -// Returns the created operation. -func EnqueueTPUEmbeddingRaggedTensorBatch(scope *Scope, sample_splits []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingRaggedTensorBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"table_ids": table_ids} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingRaggedTensorBatch", - Input: []tf.Input{ - tf.OutputList(sample_splits), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Fake-quantize the 'inputs' tensor of type float via global float scalars -// -// Fake-quantize the `inputs` tensor of type float via global float scalars -// `min` and `max` to `outputs` tensor of same shape as `inputs`. -// -// Attributes -// -// * `[min; max]` define the clamping range for the `inputs` data. -// * `inputs` values are quantized into the quantization range ( -// `[0; 2^num_bits - 1]` when `narrow_range` is false and `[1; 2^num_bits - 1]` -// when it is true) and then de-quantized and output as floats in `[min; max]` -// interval. -// * `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. -// -// Before quantization, `min` and `max` values are adjusted with the following -// logic. -// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values, -// the behavior can be unexpected: -// -// * If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`. -// * If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`. -// * If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `, -// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`. -// -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", - Input: []tf.Input{ - inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Enqueue multiple Tensor values on the computation outfeed. -// -// Arguments: -// inputs: A list of tensors that will be inserted into the outfeed queue as an -// XLA tuple. -// -// Returns the created operation. -func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OutfeedEnqueueTuple", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - return scope.AddOperation(opspec) -} - -// Returns the number of nonzeroes of `sparse_matrix`. -// -// Arguments: -// sparse_matrix: A CSRSparseMatrix. -// -// Returns The number of nonzeroes of `sparse_matrix`. -func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseMatrixNNZ", - Input: []tf.Input{ - sparse_matrix, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DecodeJpegAttr is an optional argument to DecodeJpeg. -type DecodeJpegAttr func(optionalAttr) - -// DecodeJpegChannels sets the optional channels attribute to value. -// -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeJpegChannels(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeJpegRatio(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value - } -} - -// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value - } -} - -// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value - } -} - -// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value - } -} - -// DecodeJpegDctMethod sets the optional dct_method attribute to value. -// -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeJpegDctMethod(value string) DecodeJpegAttr { - return func(m optionalAttr) { - m["dct_method"] = value - } -} - -// Decode a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// -// -// This op also supports decoding PNGs and non-animated GIFs since the interface is -// the same, though it is cleaner to use `tf.io.decode_image`. -// -// Arguments: -// contents: 0-D. The JPEG-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeJpeg", - Input: []tf.Input{ - contents, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters. -type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingADAMParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingADAMParametersConfig(value string) LoadTPUEmbeddingADAMParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load ADAM embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the ADAM optimization algorithm. -// momenta: Value of momenta used in the ADAM optimization algorithm. -// velocities: Value of velocities used in the ADAM optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingADAMParameters", - Input: []tf.Input{ - parameters, momenta, velocities, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Records the latency of producing `input_dataset` elements in a StatsAggregator. -func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "LatencyStatsDataset", - Input: []tf.Input{ - input_dataset, tag, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the power of one value to another. -// -// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for -// corresponding elements in `x` and `y`. For example: -// -// ``` -// # tensor 'x' is [[2, 2]], [3, 3]] -// # tensor 'y' is [[8, 16], [2, 3]] -// tf.pow(x, y) ==> [[256, 65536], [9, 27]] -// ``` -func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Pow", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Element-wise multiplication of a sparse matrix with a dense tensor. -// -// Returns a sparse matrix. -// -// The dense tensor `b` may be either a scalar; otherwise `a` must be a rank-3 -// `SparseMatrix`; in this case `b` must be shaped `[batch_size, 1, 1]` and the -// multiply operation broadcasts. -// -// **NOTE** even if `b` is zero, the sparsity structure of the output does not -// change. -// -// Arguments: -// a: A CSRSparseMatrix. -// b: A dense tensor. -// -// Returns A dense output tensor. -func SparseMatrixMul(scope *Scope, a tf.Output, b tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseMatrixMul", - Input: []tf.Input{ - a, b, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. -// -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. -// -// Returns a `Tensor` of same shape and type as the elements of `inputs`. -// -// Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "AccumulateNV2", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// An op enabling differentiation of TPU Embeddings. -// -// This op simply returns its first input, which is assumed to have been sliced -// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of -// this op, and its first argument being a trainable Variable, enables automatic -// differentiation of graphs containing embeddings via the TPU Embedding Python -// libraries. -// -// Arguments: -// embedding_variable: A trainable variable, enabling optimizers to find this op. -// sliced_activations: The embedding activations Tensor to return. -// table_id: The id of the table in the embedding layer configuration from which -// these activations were computed. -// lookup_id: Identifier of the set of embedding indices which produced these -// activations. -func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id} - opspec := tf.OpSpec{ - Type: "TPUEmbeddingActivations", - Input: []tf.Input{ - embedding_variable, sliced_activations, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. -type QuantizeAndDequantizeV3Attr func(optionalAttr) - -// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeV3NarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func QuantizeAndDequantizeV3NarrowRange(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// QuantizeAndDequantizeV3Axis sets the optional axis attribute to value. -// If not specified, defaults to -1 -func QuantizeAndDequantizeV3Axis(value int64) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["axis"] = value - } -} - -// Quantizes then dequantizes a tensor. -// -// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a -// tensor, so its value can change during training. -func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV3", - Input: []tf.Input{ - input, input_min, input_max, num_bits, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns x * y element-wise. -// -// *NOTE*: `Multiply` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Mul", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes softplus gradients for a softplus operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding softplus operation. -// features: The features passed as input to the corresponding softplus operation. -// -// Returns The gradients: `gradients / (1 + exp(-features))`. -func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SoftplusGrad", - Input: []tf.Input{ - gradients, features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the item in the list with the given index. -// -// input_handle: the list -// index: the position in the list from which an element will be retrieved -// item: the element at that position -// -// -func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListGetItem", - Input: []tf.Input{ - input_handle, index, element_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug. -type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve proximal Adagrad embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm. -// accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm. -func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the value stored in an Optional variant or raises an error if none exists. -func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "OptionalGetValue", - Input: []tf.Input{ - optional, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("OptionalGetValue", err) - return - } - return components -} - -// Determine the script codes of a given tensor of Unicode integer code points. -// -// This operation converts Unicode code points to script codes corresponding to -// each code point. Script codes correspond to International Components for -// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html. -// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will -// match input shape. -// -// Examples: -// -// >>> tf.strings.unicode_script([1, 31, 38]) -// -// -// Arguments: -// input: A Tensor of int32 Unicode code points. -// -// Returns A Tensor of int32 script codes corresponding to each input code point. -func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnicodeScript", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CropAndResizeAttr is an optional argument to CropAndResize. -type CropAndResizeAttr func(optionalAttr) - -// CropAndResizeMethod sets the optional method attribute to value. -// -// value: A string specifying the sampling method for resizing. It can be either -// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling -// methods are supported: Bilinear and Nearest Neighbor. -// If not specified, defaults to "bilinear" -func CropAndResizeMethod(value string) CropAndResizeAttr { - return func(m optionalAttr) { - m["method"] = value - } -} - -// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value. -// -// value: Value used for extrapolation, when applicable. -// If not specified, defaults to 0 -func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr { - return func(m optionalAttr) { - m["extrapolation_value"] = value - } -} - -// Extracts crops from the input image tensor and resizes them. -// -// Extracts crops from the input image tensor and resizes them using bilinear -// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a -// common output size specified by `crop_size`. This is more general than the -// `crop_to_bounding_box` op which extracts a fixed size slice from the input image -// and does not allow resizing or aspect ratio change. -// -// Returns a tensor with `crops` from the input `image` at positions defined at the -// bounding box locations in `boxes`. The cropped boxes are all resized (with -// bilinear or nearest neighbor interpolation) to a fixed -// `size = [crop_height, crop_width]`. The result is a 4-D tensor -// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. -// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical -// results to using `tf.image.resize_bilinear()` or -// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with -// `align_corners=True`. -// -// Arguments: -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All -// cropped image patches are resized to this size. The aspect ratio of the image -// content is not preserved. Both `crop_height` and `crop_width` need to be -// positive. -// -// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CropAndResize", - Input: []tf.Input{ - image, boxes, box_ind, crop_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. -type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) - -// DepthwiseConv2dNativeBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. -// If not specified, defaults to <> -func DepthwiseConv2dNativeBackpropFilterExplicitPaddings(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the filter. -// -// Arguments: -// input: 4-D with shape based on `data_format`. For example, if -// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, -// in_width, in_channels]` tensor. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropFilter", - Input: []tf.Input{ - input, filter_sizes, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that zips together `input_datasets`. -// -// The elements of the resulting dataset are created by zipping corresponding -// elements from each of the input datasets. -// -// The size of the resulting dataset will match the size of the smallest input -// dataset, and no error will be raised if input datasets have different sizes. -// -// Arguments: -// input_datasets: List of `N` variant Tensors representing datasets to be zipped together. -// -// -func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ZipDataset", - Input: []tf.Input{ - tf.OutputList(input_datasets), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Rounds the values of a tensor to the nearest integer, element-wise. -// -// Rounds half to even. Also known as bankers rounding. If you want to round -// according to the current system rounding mode use std::cint. -func Round(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Round", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a tree ensemble model and returns a handle to it. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble resource to be created. -// stamp_token: Token to use as the initial value of the resource stamp. -// tree_ensemble_serialized: Serialized proto of the tree ensemble. -// -// Returns the created operation. -func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesCreateEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, stamp_token, tree_ensemble_serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Calculates the softmax of a CSRSparseMatrix. -// -// Calculate the softmax of the innermost dimensions of a SparseMatrix. -// -// Missing values are treated as `-inf` (i.e., logits of zero probability); and -// the output has the same sparsity structure as the input (though missing values -// in the output may now be treated as having probability zero). -// -// Arguments: -// logits: A CSRSparseMatrix. -// -// -// Returns A CSRSparseMatrix. -func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixSoftmax", - Input: []tf.Input{ - logits, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RestoreAttr is an optional argument to Restore. -type RestoreAttr func(optionalAttr) - -// RestorePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. -// If not specified, defaults to -1 -func RestorePreferredShard(value int64) RestoreAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } -} - -// Restores a tensor from checkpoint files. -// -// Reads a tensor stored in one or several files. If there are several files (for -// instance because a tensor was saved as slices), `file_pattern` may contain -// wildcard symbols (`*` and `?`) in the filename portion only, not in the -// directory portion. -// -// If a `file_pattern` matches several files, `preferred_shard` can be used to hint -// in which file the requested tensor is likely to be found. This op will first -// open the file at index `preferred_shard` in the list of matching files and try -// to restore tensors from that file. Only if some tensors or tensor slices are -// not found in that first file, then the Op opens all the files. Setting -// `preferred_shard` to match the value passed as the `shard` input -// of a matching `Save` Op may speed up Restore. This attribute only affects -// performance, not correctness. The default value -1 means files are processed in -// order. -// -// See also `RestoreSlice`. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// dt: The type of the tensor to be restored. -// -// Returns The restored tensor. -func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Restore", - Input: []tf.Input{ - file_pattern, tensor_name, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the next record (key, value pair) produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). -// -// Arguments: -// reader_handle: Handle to a Reader. -// queue_handle: Handle to a Queue, with string work items. -// -// Returns: -// key: A scalar. -// value: A scalar. -func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderReadV2", - Input: []tf.Input{ - reader_handle, queue_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) - -// CumprodExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumprod. -// If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { - return func(m optionalAttr) { - m["exclusive"] = value - } -} - -// CumprodReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { - return func(m optionalAttr) { - m["reverse"] = value - } -} - -// Compute the cumulative product of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: -// -// ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` -// -// Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Cumprod", - Input: []tf.Input{ - x, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug. -type LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load SGD embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, parameters tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug", - Input: []tf.Input{ - parameters, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Creates a dataset that batches input elements into a SparseTensor. -// -// Arguments: -// input_dataset: A handle to an input dataset. Must have a single component. -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// row_shape: A vector representing the dense shape of each row in the produced -// SparseTensor. The shape may be partially specified, using `-1` to indicate -// that a particular dimension should use the maximum size of all batch elements. -// -// -func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "DenseToSparseBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, row_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug. -type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adadelta parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adadelta optimization algorithm. -// accumulators: Value of accumulators used in the Adadelta optimization algorithm. -// updates: Value of updates used in the Adadelta optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug", - Input: []tf.Input{ - parameters, accumulators, updates, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns x / y element-wise. -// -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Div", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Enqueue a Tensor on the computation outfeed. -// -// Arguments: -// input: A tensor that will be inserted into the outfeed queue. -// -// Returns the created operation. -func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OutfeedEnqueue", - Input: []tf.Input{ - input, - }, - } - return scope.AddOperation(opspec) -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug. -type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load proximal Adagrad embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug", - Input: []tf.Input{ - parameters, accumulators, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Serializes the tree ensemble to a proto. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// -// Returns: -// stamp_token: Stamp token of the tree ensemble resource. -// tree_ensemble_serialized: Serialized proto of the ensemble. -func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesSerializeEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes inverse hyperbolic cosine of x element-wise. -// -// Given an input tensor, the function computes inverse hyperbolic cosine of every element. -// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range. -// -// ```python -// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")]) -// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf] -// ``` -func Acosh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acosh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs deterministic pseudorandom random numbers from a gamma distribution. -// -// Outputs random values from a gamma distribution. -// -// The outputs are a deterministic function of `shape`, `seed`, and `alpha`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// alpha: The concentration of the gamma distribution. Shape must match the rightmost -// dimensions of `shape`. -// -// Returns Random values with specified shape. -func StatelessRandomGammaV2(scope *Scope, shape tf.Output, seed tf.Output, alpha tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatelessRandomGammaV2", - Input: []tf.Input{ - shape, seed, alpha, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that executes a SQL query and emits rows of the result set. -// -// Arguments: -// driver_name: The database type. Currently, the only supported type is 'sqlite'. -// data_source_name: A connection string to connect to the database. -// query: A SQL query to execute. -// -// -func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "SqlDataset", - Input: []tf.Input{ - driver_name, data_source_name, query, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Outputs deterministic pseudorandom random integers from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[minval, maxval)`. -// -// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// minval: Minimum value (inclusive, scalar). -// maxval: Maximum value (exclusive, scalar). -// -// Returns Random values with specified shape. -func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "StatelessRandomUniformInt", - Input: []tf.Input{ - shape, seed, minval, maxval, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns a batched diagonal tensor with a given batched diagonal values. -// -// Given a `diagonal`, this operation returns a tensor with the `diagonal` and -// everything else padded with zeros. The diagonal is computed as follows: -// -// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a -// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where: -// -// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`. -// -// For example: -// -// ``` -// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]] -// -// and diagonal.shape = (2, 4) -// -// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]], -// [[5, 0, 0, 0] -// [0, 6, 0, 0] -// [0, 0, 7, 0] -// [0, 0, 0, 8]]] -// -// which has shape (2, 4, 4) -// ``` -// -// Arguments: -// diagonal: Rank `k`, where `k >= 1`. -// -// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`. -func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixDiag", - Input: []tf.Input{ - diagonal, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) - -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom values from a truncated normal distribution. -// -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", - Input: []tf.Input{ - shape, seed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug. -type RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve SGD embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the stochastic gradient descent optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm. -func RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebugAttr) (parameters tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingStochasticGradientDescentParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. -type StatelessRandomUniformAttr func(optionalAttr) - -// StatelessRandomUniformDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// -// Returns Random values with specified shape. -func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessRandomUniform", - Input: []tf.Input{ - shape, seed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StatelessRandomUniformFullIntAttr is an optional argument to StatelessRandomUniformFullInt. -type StatelessRandomUniformFullIntAttr func(optionalAttr) - -// StatelessRandomUniformFullIntDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_UINT64 -func StatelessRandomUniformFullIntDtype(value tf.DataType) StatelessRandomUniformFullIntAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom random integers from a uniform distribution. -// -// The generated values are uniform integers covering the whole range of `dtype`. -// -// The outputs are a deterministic function of `shape` and `seed`. -// -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// -// Returns Random values with specified shape. -func StatelessRandomUniformFullInt(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformFullIntAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatelessRandomUniformFullInt", - Input: []tf.Input{ - shape, seed, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug. -type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) - -// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Momentum embedding parameters with debug support. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Momentum optimization algorithm. -// momenta: Parameter momenta updated by the Momentum optimization algorithm. -// gradient_accumulators: Parameter gradient_accumulators updated by the Momentum optimization algorithm. -func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// EqualAttr is an optional argument to Equal. -type EqualAttr func(optionalAttr) - -// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value. -// If not specified, defaults to true -func EqualIncompatibleShapeError(value bool) EqualAttr { - return func(m optionalAttr) { - m["incompatible_shape_error"] = value - } -} - -// Returns the truth value of (x == y) element-wise. -// -// *NOTE*: `Equal` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// ```python -// x = tf.constant([2, 4]) -// y = tf.constant(2) -// tf.math.equal(x, y) ==> array([True, False]) -// -// x = tf.constant([2, 4]) -// y = tf.constant([2, 4]) -// tf.math.equal(x, y) ==> array([True, True]) -// ``` -func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Equal", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. -type SparseToSparseSetOperationAttr func(optionalAttr) - -// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of 2 `SparseTensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the -// order and range of `set1` and `set2` indices. -// -// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, -// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same -// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set1` -// and `set2` indices. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must -// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the -// max set size across `0...n-1` dimensions. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the -// max set size across `0...n-1` dimensions. -// -// -// Returns: -// result_indices: 2D indices of a `SparseTensor`. -// result_values: 1D values of a `SparseTensor`. -// result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SparseToSparseSetOperation", - Input: []tf.Input{ - set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple. -type InfeedEnqueueTupleAttr func(optionalAttr) - -// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence for -// all the tuple shapes, in the order the shapes appear in the "shapes" input. -// The layout elements for a sub-shape can be set to -1, in which case the -// corresponding layout will be computed by the infeed operation. -// If not specified, defaults to <> -func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr { - return func(m optionalAttr) { - m["layouts"] = value - } -} - -// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// Feeds multiple Tensor values into the computation as an XLA tuple. -// -// Arguments: -// inputs: A list of tensors that will be provided using the infeed mechanism. -// shapes: The shapes of each tensor in `inputs`. -// -// Returns the created operation. -func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shapes": shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InfeedEnqueueTuple", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters. -type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load RMSProp embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the RMSProp optimization algorithm. -// ms: Value of ms used in the RMSProp optimization algorithm. -// mom: Value of mom used in the RMSProp optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingRMSPropParameters", - Input: []tf.Input{ - parameters, ms, mom, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Creates a Dataset that returns pseudorandom numbers. -// -// Creates a Dataset that returns a stream of uniformly distributed -// pseudorandom 64-bit signed integers. -// -// In the TensorFlow Python API, you can instantiate this dataset via the -// class `tf.data.experimental.RandomDataset`. -// -// Instances of this dataset are also created as a result of the -// `hoist_random_uniform` static optimization. Whether this optimization is -// performed is determined by the `experimental_optimization.hoist_random_uniform` -// option of `tf.data.Options`. -// -// Arguments: -// seed: A scalar seed for the random number generator. If either seed or -// seed2 is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// -// -func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RandomDataset", - Input: []tf.Input{ - seed, seed2, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool. -type FractionalAvgPoolAttr func(optionalAttr) - -// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value. -// -// value: When set to True, generates the pooling sequence in a -// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin -// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for -// difference between pseudorandom and random. -// If not specified, defaults to false -func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["pseudo_random"] = value - } -} - -// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` -// -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. -// If not specified, defaults to false -func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["overlapping"] = value - } -} - -// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value. -// -// value: When set to True, a fixed pooling region will be used when -// iterating over a FractionalAvgPool node in the computation graph. Mainly used -// in unit test to make FractionalAvgPool deterministic. -// If not specified, defaults to false -func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["deterministic"] = value - } -} - -// FractionalAvgPoolSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Performs fractional average pooling on the input. -// -// Fractional average pooling is similar to Fractional max pooling in the pooling -// region generation step. The only difference is that after pooling regions are -// generated, a mean operation is performed instead of a max operation in each -// pooling region. -// -// Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// pooling_ratio: Pooling ratio for each dimension of `value`, currently only -// supports row and col dimension and should be >= 1.0. For example, a valid -// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements -// must be 1.0 because we don't allow pooling on batch and channels -// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions -// respectively. -// -// Returns: -// output: output tensor after fractional avg pooling. -// row_pooling_sequence: row pooling sequence, needed to calculate gradient. -// col_pooling_sequence: column pooling sequence, needed to calculate gradient. -func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FractionalAvgPool", - Input: []tf.Input{ - value, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt. -type StatefulUniformFullIntAttr func(optionalAttr) - -// StatefulUniformFullIntDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_UINT64 -func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random integers from a uniform distribution. -// -// The generated values are uniform integers covering the whole range of `dtype`. -// -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. -// -// Returns Random values with specified shape. -func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StatefulUniformFullInt", - Input: []tf.Input{ - resource, algorithm, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters. -type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load SGD embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingStochasticGradientDescentParameters", - Input: []tf.Input{ - parameters, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RequantizePerChannelAttr is an optional argument to RequantizePerChannel. -type RequantizePerChannelAttr func(optionalAttr) - -// RequantizePerChannelOutType sets the optional out_type attribute to value. -// -// value: The quantized type of output tensor that needs to be converted. -// If not specified, defaults to DT_QUINT8 -func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Requantizes input with min and max values known per channel. -// -// Arguments: -// input: The original input tensor. -// input_min: The minimum value of the input tensor -// input_max: The maximum value of the input tensor. -// requested_output_min: The minimum value of the output tensor requested. -// requested_output_max: The maximum value of the output tensor requested. -// -// Returns: -// output: Output tensor. -// output_min: The minimum value of the final output tensor -// output_max: The maximum value of the final output tensor. -func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RequantizePerChannel", - Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// LeakyReluAttr is an optional argument to LeakyRelu. -type LeakyReluAttr func(optionalAttr) - -// LeakyReluAlpha sets the optional alpha attribute to value. -// If not specified, defaults to 0.2 -func LeakyReluAlpha(value float32) LeakyReluAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// Computes rectified linear: `max(features, features * alpha)`. -func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LeakyRelu", - Input: []tf.Input{ - features, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Component-wise divides a SparseTensor by a dense Tensor. -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. -// -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseDiv", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch. -type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// An op that enqueues a list of input batch tensors to TPUEmbedding. -// -// Arguments: -// batch: A list of 1D tensors, one for each embedding table, containing the -// indices into the tables. -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// -// Returns the created operation. -func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingIntegerBatch", - Input: []tf.Input{ - tf.OutputList(batch), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) - -// MapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Deserialize `SparseTensor` objects. -// -// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where -// the last dimension stores serialized `SparseTensor` objects and the other N -// dimensions (N >= 0) correspond to a batch. The ranks of the original -// `SparseTensor` objects must all match. When the final `SparseTensor` is -// created, its rank is the rank of the incoming `SparseTensor` objects plus N; -// the sparse tensors have been concatenated along new dimensions, one for each -// batch. -// -// The output `SparseTensor` object's shape values for the original dimensions -// are the max across the input `SparseTensor` objects' shape values for the -// corresponding dimensions. The new dimensions match the size of the batch. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and -// -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// -// then the final deserialized `SparseTensor` will be: -// -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] -// -// Arguments: -// serialized_sparse: The serialized `SparseTensor` objects. The last dimension -// must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "DeserializeSparse", - Input: []tf.Input{ - serialized_sparse, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Decode web-safe base64-encoded strings. -// -// Input may or may not have padding at the end. See EncodeBase64 for padding. -// Web-safe means that input must use - and _ instead of + and /. -// -// Arguments: -// input: Base64 strings to decode. -// -// Returns Decoded strings. -func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeBase64", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters. -type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Adagrad embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Adagrad optimization algorithm. -// accumulators: Value of accumulators used in the Adagrad optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingAdagradParameters", - Input: []tf.Input{ - parameters, accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns the gradient of `Tile`. -// -// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum -// -// Since `Tile` takes an input and repeats the input `multiples` times -// along each dimension, `TileGrad` takes in `multiples` and aggregates -// each repeated tile of `input` into `output`. -func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TileGrad", - Input: []tf.Input{ - input, multiples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AudioSummaryAttr is an optional argument to AudioSummary. -type AudioSummaryAttr func(optionalAttr) - -// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"sample_rate": sample_rate} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AudioSummary", - Input: []tf.Input{ - tag, tensor, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters. -type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr) - -// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingFTRLParametersConfig(value string) LoadTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load FTRL embedding parameters. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the FTRL optimization algorithm. -// accumulators: Value of accumulators used in the FTRL optimization algorithm. -// linears: Value of linears used in the FTRL optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingFTRLParameters", - Input: []tf.Input{ - parameters, accumulators, linears, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput. -type Conv3DBackpropInputAttr func(optionalAttr) - -// Conv3DBackpropInputDilations sets the optional dilations attribute to value. -// If not specified, defaults to -func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Conv3DBackpropInput", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. -type DepthwiseConv2dNativeAttr func(optionalAttr) - -// DepthwiseConv2dNativeExplicitPaddings sets the optional explicit_paddings attribute to value. -// If not specified, defaults to <> -func DepthwiseConv2dNativeExplicitPaddings(value []int64) DepthwiseConv2dNativeAttr { - return func(m optionalAttr) { - m["explicit_paddings"] = value - } -} - -// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors. -// -// Given an input tensor of shape `[batch, in_height, in_width, in_channels]` -// and a filter / kernel tensor of shape -// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing -// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies -// a different filter to each input channel (expanding from 1 channel to -// `channel_multiplier` channels for each), then concatenates the results -// together. Thus, the output has `in_channels * channel_multiplier` channels. -// -// ``` -// for k in 0..in_channels-1 -// for q in 0..channel_multiplier-1 -// output[b, i, j, k * channel_multiplier + q] = -// sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] * -// filter[di, dj, k, q] -// ``` -// -// Must have `strides[0] = strides[3] = 1`. For the most common case of the same -// horizontal and vertices strides, `strides = [1, stride, stride, 1]`. -// -// Arguments: -// -// -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. -// padding: The type of padding algorithm to use. -func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNative", - Input: []tf.Input{ - input, filter, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`. -// -// Arguments: -// dense_shape: The desired matrix shape. -// -// -// Returns An empty CSR matrix with shape `dense_shape`. -func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"type": type_} - opspec := tf.OpSpec{ - Type: "SparseMatrixZeros", - Input: []tf.Input{ - dense_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. -type PaddingFIFOQueueV2Attr func(optionalAttr) - -// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. -// Shapes of fixed rank but variable size are allowed by setting -// any shape dimension to -1. In this case, the inputs' shape may vary along -// the given dimension, and DequeueMany will pad the given dimension with -// zeros up to the maximum shape of all elements in the given batch. -// If the length of this attr is 0, different queue elements may have -// different ranks and shapes, but only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// PaddingFIFOQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. -// -// Variable-size shapes are allowed by setting the corresponding shape dimensions -// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum -// size of any given element in the minibatch. See below for details. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "PaddingFIFOQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug. -type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Load Momentum embedding parameters with debug support. -// -// An op that loads optimization parameters into HBM for embedding. Must be -// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -// embedding table configuration. For example, this op is used to install -// parameters that are loaded from a checkpoint before a training loop is -// executed. -// -// Arguments: -// parameters: Value of parameters used in the Momentum optimization algorithm. -// momenta: Value of momenta used in the Momentum optimization algorithm. -// gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm. -// -// -// -// Returns the created operation. -func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug", - Input: []tf.Input{ - parameters, momenta, gradient_accumulators, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Constructs a tensor by tiling a given tensor. -// -// This operation creates a new tensor by replicating `input` `multiples` times. -// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements, -// and the values of `input` are replicated `multiples[i]` times along the 'i'th -// dimension. For example, tiling `[a b c d]` by `[2]` produces -// `[a b c d a b c d]`. -// -// >>> a = tf.constant([[1,2,3],[4,5,6]], tf.int32) -// >>> b = tf.constant([1,2], tf.int32) -// >>> tf.tile(a, b) -// -// >>> c = tf.constant([2,1], tf.int32) -// >>> tf.tile(a, c) -// -// >>> d = tf.constant([2,2], tf.int32) -// >>> tf.tile(a, d) -// -// -// Arguments: -// input: 1-D or higher. -// multiples: 1-D. Length must be the same as the number of dimensions in `input` -func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Tile", - Input: []tf.Input{ - input, multiples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SerializeSparseAttr is an optional argument to SerializeSparse. -type SerializeSparseAttr func(optionalAttr) - -// SerializeSparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize a `SparseTensor` into a `[3]` `Tensor` object. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeSparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Extracts the average gradient in the given ConditionalAccumulator. -// -// The op blocks until sufficient (i.e., more than num_required) -// gradients have been accumulated. If the accumulator has already -// aggregated more than num_required gradients, it returns the average of -// the accumulated gradients. Also automatically increments the recorded -// global_step in the accumulator by 1, and resets the aggregate to 0. -// -// Arguments: -// handle: The handle to an accumulator. -// num_required: Number of gradients required before we return an aggregate. -// dtype: The data type of accumulated gradients. Needs to correspond to the type -// of the accumulator. -// -// Returns The average of the accumulated gradients. -func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "ResourceAccumulatorTakeGradient", - Input: []tf.Input{ - handle, num_required, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// InfeedEnqueueAttr is an optional argument to InfeedEnqueue. -type InfeedEnqueueAttr func(optionalAttr) - -// InfeedEnqueueShape sets the optional shape attribute to value. -// -// value: The shape of the tensor. -// If not specified, defaults to <> -func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["shape"] = value - } -} - -// InfeedEnqueueLayout sets the optional layout attribute to value. -// -// value: A vector holding the requested layout in minor-to-major sequence. -// If a layout attribute is passed, but its values are all -1, the layout will -// be computed by the infeed operation. -// If not specified, defaults to <> -func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["layout"] = value - } -} - -// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op -// is running on a TPU device, and >= 0 when the Op is running on the CPU -// device. -// If not specified, defaults to -1 -func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// An op which feeds a single Tensor value into the computation. -// -// Arguments: -// input: A tensor that will be provided using the infeed mechanism. -// -// Returns the created operation. -func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InfeedEnqueue", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Computes the mean along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. -// -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_mean(c, tf.constant([0, 0, 1])) -// # ==> [[2.5, 2.5, 2.5, 2.5], -// # [5, 6, 7, 8]] -// ``` -// -// -// Arguments: -// -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SegmentMean", - Input: []tf.Input{ - data, segment_ids, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CTCLossV2Attr is an optional argument to CTCLossV2. -type CTCLossV2Attr func(optionalAttr) - -// CTCLossV2PreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value. -// -// value: Scalar, if true then repeated labels are -// collapsed prior to the CTC calculation. -// If not specified, defaults to false -func CTCLossV2PreprocessCollapseRepeated(value bool) CTCLossV2Attr { - return func(m optionalAttr) { - m["preprocess_collapse_repeated"] = value - } -} - -// CTCLossV2CtcMergeRepeated sets the optional ctc_merge_repeated attribute to value. -// -// value: Scalar. If set to false, *during* CTC calculation -// repeated non-blank labels will not be merged and are interpreted as -// individual labels. This is a simplified version of CTC. -// If not specified, defaults to true -func CTCLossV2CtcMergeRepeated(value bool) CTCLossV2Attr { - return func(m optionalAttr) { - m["ctc_merge_repeated"] = value - } -} - -// CTCLossV2IgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value. -// -// value: Scalar. If set to true, during CTC -// calculation, items that have longer output sequences than input sequences -// are skipped: they don't contribute to the loss term and have zero-gradient. -// If not specified, defaults to false -func CTCLossV2IgnoreLongerOutputsThanInputs(value bool) CTCLossV2Attr { - return func(m optionalAttr) { - m["ignore_longer_outputs_than_inputs"] = value - } -} - -// Calculates the CTC Loss (log probability) for each batch entry. Also calculates -// -// the gradient. This class performs the softmax operation for you, so inputs -// should be e.g. linear projections of outputs by an LSTM. -// -// Arguments: -// inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits. Default blank -// label is 0 rather num_classes - 1. -// labels_indices: The indices of a `SparseTensor`. -// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for -// `(batch b, time t)`. -// labels_values: The values (labels) associated with the given batch and time. -// sequence_length: A vector containing sequence lengths (batch). -// -// Returns: -// loss: A vector (batch) containing log-probabilities. -// gradient: The gradient of `loss`. 3-D, shape: -// `(max_time x batch_size x num_classes)`. -func CTCLossV2(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossV2Attr) (loss tf.Output, gradient tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CTCLossV2", - Input: []tf.Input{ - inputs, labels_indices, labels_values, sequence_length, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum. -type ResourceSparseApplyKerasMomentumAttr func(optionalAttr) - -// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var + momentum * accum, so in the end, the var you get is actually -// var + momentum * accum. -// If not specified, defaults to false -func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update relevant entries in '*var' and '*accum' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// That is for rows we have grad for, we update var and accum as follows: -// -// accum = accum * momentum - lr * grad -// var += accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyKerasMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, indices, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. -type MaxPoolGradGradV2Attr func(optionalAttr) - -// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters. -type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve Momentum embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the Momentum optimization algorithm. -// momenta: Parameter momenta updated by the Momentum optimization algorithm. -func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingMomentumParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU. -type ConfigureDistributedTPUAttr func(optionalAttr) - -// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value. -// -// value: Reserved. Do not use. -// If not specified, defaults to "" -func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["embedding_config"] = value - } -} - -// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value. -// -// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that -// describes the embedding lookups of the program. -// If not specified, defaults to "" -func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["tpu_embedding_config"] = value - } -} - -// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value. -// -// value: Reserved. Do not use. -// If not specified, defaults to false -func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["is_global_init"] = value - } -} - -// ConfigureDistributedTPUEnableWholeMeshCompilations sets the optional enable_whole_mesh_compilations attribute to value. -// If not specified, defaults to false -func ConfigureDistributedTPUEnableWholeMeshCompilations(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["enable_whole_mesh_compilations"] = value - } -} - -// ConfigureDistributedTPUCompilationFailureClosesChips sets the optional compilation_failure_closes_chips attribute to value. -// If not specified, defaults to true -func ConfigureDistributedTPUCompilationFailureClosesChips(value bool) ConfigureDistributedTPUAttr { - return func(m optionalAttr) { - m["compilation_failure_closes_chips"] = value - } -} - -// Sets up the centralized structures for a distributed TPU system. -// -// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU -// topology. -func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ConfigureDistributedTPU", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Combines (nests of) input elements into a dataset of (nests of) windows. -// -// A "window" is a finite dataset of flat elements of size `size` (or possibly -// fewer if there are not enough input elements to fill the window and -// `drop_remainder` evaluates to false). -// -// The `shift` argument determines the number of input elements by which -// the window moves on each iteration. The first element in the `k`th window -// will be element -// -// ``` -// 1 + (k-1) * shift -// ``` -// -// of the input dataset. In particular, the first element of the first window -// will always be the first element of the input dataset. -// -// If the `stride` parameter is greater than 1, then each window will skip -// `(stride - 1)` input elements between each element that appears in the -// window. Output windows will still contain `size` elements regardless of -// the value of `stride`. -// -// The `stride` argument determines the stride of the input elements, and the -// `shift` argument determines the shift of the window. -// -// For example, letting `{...}` to represent a Dataset: -// -// - `tf.data.Dataset.range(7).window(2)` produces -// `{{0, 1}, {2, 3}, {4, 5}, {6}}` -// - `tf.data.Dataset.range(7).window(3, 2, 1, True)` produces -// `{{0, 1, 2}, {2, 3, 4}, {4, 5, 6}}` -// - `tf.data.Dataset.range(7).window(3, 1, 2, True)` produces -// `{{0, 2, 4}, {1, 3, 5}, {2, 4, 6}}` -// -// Note that when the `window` transformation is applied to a dataset of -// nested elements, it produces a dataset of nested windows. -// -// For example: -// -// - `tf.data.Dataset.from_tensor_slices((range(4), range(4))).window(2)` -// produces `{({0, 1}, {0, 1}), ({2, 3}, {2, 3})}` -// - `tf.data.Dataset.from_tensor_slices({"a": range(4)}).window(2)` -// produces `{{"a": {0, 1}}, {"a": {2, 3}}}` -// -// Arguments: -// -// size: An integer scalar, representing the number of elements -// of the input dataset to combine into a window. Must be positive. -// shift: An integer scalar, representing the number of input elements -// by which the window moves in each iteration. Defaults to `size`. -// Must be positive. -// stride: An integer scalar, representing the stride of the input elements -// in the sliding window. Must be positive. The default value of 1 means -// "retain every input element". -// drop_remainder: A Boolean scalar, representing whether the last window should be -// dropped if its size is smaller than `window_size`. -// -// -func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "WindowDataset", - Input: []tf.Input{ - input_dataset, size, shift, stride, drop_remainder, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SetSizeAttr is an optional argument to SetSize. -type SetSizeAttr func(optionalAttr) - -// SetSizeValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SetSizeValidateIndices(value bool) SetSizeAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Number of unique elements along last dimension of input `set`. -// -// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, -// and `set_shape`. The last dimension contains values in a set, duplicates are -// allowed but ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set` -// indices. -// -// Arguments: -// set_indices: 2D `Tensor`, indices of a `SparseTensor`. -// set_values: 1D `Tensor`, values of a `SparseTensor`. -// set_shape: 1D `Tensor`, shape of a `SparseTensor`. -// -// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st -// `n-1` dimensions as `set`. Each value is the number of unique elements in -// the corresponding `[0...n-1]` dimension of `set`. -func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SetSize", - Input: []tf.Input{ - set_indices, set_values, set_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// AutoShardDatasetAttr is an optional argument to AutoShardDataset. -type AutoShardDatasetAttr func(optionalAttr) - -// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value. -// If not specified, defaults to 0 -func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr { - return func(m optionalAttr) { - m["auto_shard_policy"] = value - } -} - -// Creates a dataset that shards the input dataset. -// -// Creates a dataset that shards the input dataset by num_workers, returning a -// sharded dataset for the index-th worker. This attempts to automatically shard -// a dataset by examining the Dataset graph and inserting a shard op before the -// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset). -// -// This dataset will throw a NotFound error if we cannot shard the dataset -// automatically. -// -// Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_workers: A scalar representing the number of workers to distribute this dataset across. -// index: A scalar representing the index of the current worker out of num_workers. -// -// -func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "AutoShardDataset", - Input: []tf.Input{ - input_dataset, num_workers, index, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// InfeedEnqueuePrelinearizedBufferAttr is an optional argument to InfeedEnqueuePrelinearizedBuffer. -type InfeedEnqueuePrelinearizedBufferAttr func(optionalAttr) - -// InfeedEnqueuePrelinearizedBufferDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. This should be -1 when the Op is running on a TPU device -// and = 0 when the Op is running on the CPU device. -// If not specified, defaults to -1 -func InfeedEnqueuePrelinearizedBufferDeviceOrdinal(value int64) InfeedEnqueuePrelinearizedBufferAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// An op which enqueues prelinearized buffer into TPU infeed. -// -// Arguments: -// input: A variant tensor representing linearized output. -// -// Returns the created operation. -func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ...InfeedEnqueuePrelinearizedBufferAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "InfeedEnqueuePrelinearizedBuffer", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters. -type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve FTRL embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns: -// parameters: Parameter parameters updated by the FTRL optimization algorithm. -// accumulators: Parameter accumulators updated by the FTRL optimization algorithm. -// linears: Parameter linears updated by the FTRL optimization algorithm. -func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingFTRLParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns the result of a TPU compilation. -// -// This operation returns the result of a TPU compilation as a serialized -// CompilationResultProto, which holds a status and an error message if an error -// occurred during compilation. -func TPUCompilationResult(scope *Scope) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TPUCompilationResult", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2. -type ResourceApplyAdagradV2Attr func(optionalAttr) - -// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr { - return func(m optionalAttr) { - m["update_slots"] = value - } -} - -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / (sqrt(accum) + epsilon)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagradV2", - Input: []tf.Input{ - var_, accum, lr, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Shuts down a running distributed TPU system. -// -// The op returns an error if no system is running. -// -// Returns the created operation. -func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ShutdownDistributedTPU", - } - return scope.AddOperation(opspec) -} - -// SerializeManySparseAttr is an optional argument to SerializeManySparse. -type SerializeManySparseAttr func(optionalAttr) - -// SerializeManySparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. -// -// The `SparseTensor` must have rank `R` greater than 1, and the first dimension -// is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The serialized -// `SparseTensor` objects going into each row of `serialized_sparse` will have -// rank `R-1`. -// -// The minibatch size `N` is extracted from `sparse_shape[0]`. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeManySparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let -// -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, -// -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ -// -// Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InTopKV2", - Input: []tf.Input{ - predictions, targets, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates an Optional variant with no value. -func OptionalNone(scope *Scope) (optional tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "OptionalNone", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters. -type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. -// If not specified, defaults to -1 -func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_id"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["table_name"] = value - } -} - -// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value. -// If not specified, defaults to "" -func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { - return func(m optionalAttr) { - m["config"] = value - } -} - -// Retrieve SGD embedding parameters. -// -// An op that retrieves optimization parameters from embedding to host -// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -// the correct embedding table configuration. For example, this op is -// used to retrieve updated parameters before saving a checkpoint. -// -// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm. -func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNAttr is an optional argument to CudnnRNN. -type CudnnRNNAttr func(optionalAttr) - -// CudnnRNNRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNRnnMode(value string) CudnnRNNAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNInputMode(value string) CudnnRNNAttr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNDirection(value string) CudnnRNNAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} - -// CudnnRNNDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNDropout(value float32) CudnnRNNAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNSeed(value int64) CudnnRNNAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// CudnnRNNSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNSeed2(value int64) CudnnRNNAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// CudnnRNNIsTraining sets the optional is_training attribute to value. -// If not specified, defaults to true -func CudnnRNNIsTraining(value bool) CudnnRNNAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// A RNN backed by cuDNN. -// -// Computes the RNN from the input and initial states, with respect to the params -// buffer. -// -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// is_training: Indicates whether this operation is used for inference or -// training. -// reserve_space: An opaque tensor that can be used in backprop calculation. It -// is only produced if is_training is false. -func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "CudnnRNN", - Input: []tf.Input{ - input, input_h, input_c, params, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) -} - -// Creates a dataset that batches `batch_size` elements from `input_dataset`. -// -// Arguments: -// -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// -// -func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "BatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// EnqueueTPUEmbeddingSparseTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseTensorBatch. -type EnqueueTPUEmbeddingSparseTensorBatchAttr func(optionalAttr) - -// EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value. -// -// value: The TPU device to use. Should be >= 0 and less than the number -// of TPU cores in the task on which the node is placed. -// If not specified, defaults to -1 -func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseTensorBatchAttr { - return func(m optionalAttr) { - m["device_ordinal"] = value - } -} - -// EnqueueTPUEmbeddingSparseTensorBatchCombiners sets the optional combiners attribute to value. -// -// value: A list of string scalars, one for each embedding table that specify -// how to normalize the embedding activations after weighted summation. -// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have -// the sum of the weights be 0 for 'mean' or the sum of the squared weights be -// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for -// all tables. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr { - return func(m optionalAttr) { - m["combiners"] = value - } -} - -// EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value. -// If not specified, defaults to <> -func EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingSparseTensorBatchAttr { - return func(m optionalAttr) { - m["max_sequence_lengths"] = value - } -} - -// Eases the porting of code that uses tf.nn.embedding_lookup_sparse(). -// -// sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond -// to the ith feature. table_ids[i] indicates which embedding table to look up ith -// feature. -// -// The tensors at corresponding positions in the three input lists (sample_indices, -// embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1 -// with dim_size() equal to the total number of lookups into the table described by -// the corresponding feature. -// -// Arguments: -// sample_indices: A list of rank 1 Tensors specifying the training example to -// which the corresponding embedding_indices and aggregation_weights values -// belong. It corresponds to sp_ids.indices[:,0] in embedding_lookup_sparse(). -// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -// It corresponds to sp_ids.values in embedding_lookup_sparse(). -// aggregation_weights: A list of rank 1 Tensors containing per training example -// aggregation weights. It corresponds to sp_weights.values in -// embedding_lookup_sparse(). -// mode_override: A string input that overrides the mode specified in the -// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', -// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set -// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -// table_ids: A list of integers specifying the identifier of the embedding table -// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the -// corresponding input. The ith input is looked up using table_ids[i]. The size -// of the table_ids list must be equal to that of sample_indices, -// embedding_indices and aggregation_weights. -// -// Returns the created operation. -func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingSparseTensorBatchAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"table_ids": table_ids} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "EnqueueTPUEmbeddingSparseTensorBatch", - Input: []tf.Input{ - tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// ReverseSequenceAttr is an optional argument to ReverseSequence. -type ReverseSequenceAttr func(optionalAttr) - -// ReverseSequenceBatchDim sets the optional batch_dim attribute to value. -// -// value: The dimension along which reversal is performed. -// If not specified, defaults to 0 -func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr { - return func(m optionalAttr) { - m["batch_dim"] = value - } -} - -// Reverses variable length slices. -// -// This op first slices `input` along the dimension `batch_dim`, and for each -// slice `i`, reverses the first `seq_lengths[i]` elements along -// the dimension `seq_dim`. -// -// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`, -// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`. -// -// The output slice `i` along dimension `batch_dim` is then given by input -// slice `i`, with the first `seq_lengths[i]` slices along dimension -// `seq_dim` reversed. -// -// For example: -// -// ``` -// # Given this: -// batch_dim = 0 -// seq_dim = 1 -// input.dims = (4, 8, ...) -// seq_lengths = [7, 2, 3, 5] -// -// # then slices of input are reversed on seq_dim, but only up to seq_lengths: -// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...] -// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...] -// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...] -// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...] -// -// # while entries past seq_lens are copied through: -// output[0, 7:, :, ...] = input[0, 7:, :, ...] -// output[1, 2:, :, ...] = input[1, 2:, :, ...] -// output[2, 3:, :, ...] = input[2, 3:, :, ...] -// output[3, 2:, :, ...] = input[3, 2:, :, ...] -// ``` -// -// In contrast, if: -// -// ``` -// # Given this: -// batch_dim = 2 -// seq_dim = 0 -// input.dims = (8, ?, 4, ...) -// seq_lengths = [7, 2, 3, 5] -// -// # then slices of input are reversed on seq_dim, but only up to seq_lengths: -// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...] -// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...] -// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...] -// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...] -// -// # while entries past seq_lens are copied through: -// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...] -// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...] -// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...] -// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...] -// ``` -// -// Arguments: -// input: The input to reverse. -// seq_lengths: 1-D with length `input.dims(batch_dim)` and -// `max(seq_lengths) <= input.dims(seq_dim)` -// seq_dim: The dimension which is partially reversed. -// -// Returns The partially reversed input. It has the same shape as `input`. -func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"seq_dim": seq_dim} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ReverseSequence", - Input: []tf.Input{ - input, seq_lengths, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Fetches multiple values from infeed as an XLA tuple. -// -// Arguments: -// dtypes: The element types of each element in `outputs`. -// shapes: The shapes of each tensor in `outputs`. -// -// Returns A list of tensors that will be provided using the infeed mechanism. -func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes} - opspec := tf.OpSpec{ - Type: "InfeedDequeueTuple", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("InfeedDequeueTuple", err) - return - } - return outputs -} - -// Creates and returns an empty tensor list. -// -// All list elements must be tensors of dtype element_dtype and shape compatible -// with element_shape. -// -// handle: an empty tensor list. -// element_dtype: the type of elements in the list. -// element_shape: a shape compatible with that of elements in the list. -func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "EmptyTensorList", - Input: []tf.Input{ - element_shape, max_num_elements, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Sets up TPUEmbedding in a distributed TPU system. -// -// Arguments: -// config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that -// describes the embedding lookups of the program. -// -// Returns the created operation. -func ConfigureTPUEmbedding(scope *Scope, config string) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"config": config} - opspec := tf.OpSpec{ - Type: "ConfigureTPUEmbedding", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Returns the number of gradients aggregated in the given accumulators. -// -// Arguments: -// handle: The handle to an accumulator. -// -// Returns The number of gradients aggregated in the given accumulator. -func ResourceAccumulatorNumAccumulated(scope *Scope, handle tf.Output) (num_accumulated tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceAccumulatorNumAccumulated", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Connects N outputs from an N-way replicated TPU computation. -// -// This operation holds a replicated output from a `tpu.replicate()` computation subgraph. -// Each replicated output has the same shape and type alongside the input. -// -// For example: -// ``` -// %computation = "tf.Computation"() -// %replicated_output:2 = "tf.TPUReplicatedOutput"(%computation) -// ``` -// The above computation has a replicated output of two replicas. -func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_replicas": num_replicas} - opspec := tf.OpSpec{ - Type: "TPUReplicatedOutput", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("TPUReplicatedOutput", err) - return - } - return outputs -} From 74fb47ccd26da99e57a14fccf7561e7ba7bcb000 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Mon, 8 Jun 2020 08:04:26 -0700 Subject: [PATCH 036/178] Add a new pass for promoting VarHandle ops to TF saved model arguments PiperOrigin-RevId: 315275908 Change-Id: Icbc5c032bd9474d279fecf48267665025a53c1bf --- .../tests/promote_var_handles_to_args.mlir | 20 +++++------ .../mlir/tensorflow/transforms/passes.h | 8 ++--- .../transforms/promote_resources_to_args.cc | 33 ++++++++----------- 3 files changed, 24 insertions(+), 37 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir b/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir index 925062ea4ff..8b8a070cfab 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/promote_var_handles_to_args.mlir @@ -1,5 +1,4 @@ -// Run a pass for promoting tf.VarHandleOps to function arguments in a format of TensorFlowSavedModelDialect. -// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-saved-model-promote-var-handles-to-args | FileCheck %s -dump-input-on-failure +// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-promote-var-handles-to-args | FileCheck %s -dump-input-on-failure // Tests main function with multiple blocks. @@ -12,27 +11,24 @@ func @main() { // ----- -"tf_saved_model.global_tensor"() {sym_name = "x", type = tensor, value = dense<1.67482901> : tensor} : () -> () -"tf_saved_model.global_tensor"() {sym_name = "y", type = tensor, value = dense<0> : tensor} : () -> () - // CHECK-LABEL: func @no_args -// CHECK-SAME: (%arg0: tensor>> {tf_saved_model.bound_input = @x}) +// CHECK-SAME: (%arg0: tensor {tf.resource_name = "x"}) // CHECK-NOT: "tf.VarHandleOp" func @no_args() { - %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor>> + %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor return } // CHECK-LABEL: func @some_args -// CHECK-SAME: (%arg0: tensor, %arg1: tensor>> {tf_saved_model.bound_input = @x}) +// CHECK-SAME: (%arg0: tensor, %arg1: tensor {tf.resource_name = "x"}) // CHECK-NOT: "tf.VarHandleOp" func @some_args(%arg0: tensor) { - %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor>> + %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor return } // CHECK-LABEL: func @unique_vars -// CHECK-SAME: (%arg0: tensor>> {tf_saved_model.bound_input = @x}, %arg1: tensor>> {tf_saved_model.bound_input = @y}) +// CHECK-SAME: (%arg0: tensor>> {tf.resource_name = "x"}, %arg1: tensor>> {tf.resource_name = "y"}) // CHECK-NOT: "tf.VarHandleOp" func @unique_vars() { %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor>> @@ -41,7 +37,7 @@ func @unique_vars() { } // CHECK-LABEL: func @duplicate_vars -// CHECK-SAME: (%arg0: tensor>> {tf_saved_model.bound_input = @x}) +// CHECK-SAME: (%arg0: tensor>> {tf.resource_name = "x"}) // CHECK-NOT: "tf.VarHandleOp" func @duplicate_vars() { %0 = "tf.VarHandleOp"() {container = "", shape = "tfshape$", shared_name = "x"} : () -> tensor>> @@ -50,7 +46,7 @@ func @duplicate_vars() { } // CHECK-LABEL: func @duplicate_vars_with_users -// CHECK-SAME: (%arg0: tensor, %arg1: tensor>> {tf_saved_model.bound_input = @x}) +// CHECK-SAME: (%arg0: tensor, %arg1: tensor>> {tf.resource_name = "x"}) // CHECK: "tf.ReadVariableOp"(%arg1) // CHECK: "tf.AssignAddVariableOp"(%arg1, %arg0) // CHECK-NOT: "tf.VarHandleOp" diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 3973eb60707..08c95bd8b0e 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -95,11 +95,9 @@ std::unique_ptr> CreateResourceDeviceInferencePass(); // of their aliasing output arguments. std::unique_ptr> CreatePromoteResourcesToArgsPass(); -// Creates a pass that promotes tf.VarHandleOp to to resource arguments of where -// resource names are `tf_saved_model.bound_input` symbol argument attributes -// for all functions. -std::unique_ptr> -CreatePromoteVarHandlesToSavedModelArgsPass(); +// Creates a pass that promotes tf.VarHandleOp to resource arguments for all +// functions. +std::unique_ptr> CreatePromoteVarHandlesToArgsPass(); // Creates a pass that converts readonly reference variables to the // corresponding resource variables. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc index 0d331686c46..cece23b4750 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc @@ -389,18 +389,15 @@ void PromoteResourcesToArgsPass::runOnOperation() { return signalPassFailure(); } -// This pass is for promoting Varhandle ops to tf_saved_model.bound_input -// attributes, which are required for TensorFlowSavedModelDialect. -class PromoteVarHandlesToSavedModelArgsPass - : public PassWrapper> { +class PromoteVarHandlesToArgsPass + : public PassWrapper> { public: void runOnOperation() override; }; -void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() { +void PromoteVarHandlesToArgsPass::runOnOperation() { ModuleOp module = getOperation(); - + MLIRContext* context = module.getContext(); for (auto function : module.getOps()) { if (failed(CheckSingleBlockFunction(function))) return signalPassFailure(); @@ -409,15 +406,13 @@ void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() { &var_handle_shared_names); // Add resource names for each `tf.VarHandleOp` that were promoted to - // saved model arguments. + // resource arguments. const int var_handle_args_offset = function.getNumArguments() - var_handle_shared_names.size(); - for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names)) { - auto symbol_ref = - SymbolRefAttr::get(var_name_and_index.value(), &getContext()); + for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names)) function.setArgAttr(var_name_and_index.index() + var_handle_args_offset, - "tf_saved_model.bound_input", symbol_ref); - } + kResourceNameArgAttr, + StringAttr::get(var_name_and_index.value(), context)); } } @@ -427,19 +422,17 @@ std::unique_ptr> CreatePromoteResourcesToArgsPass() { return std::make_unique(); } -std::unique_ptr> -CreatePromoteVarHandlesToSavedModelArgsPass() { - return std::make_unique(); +std::unique_ptr> CreatePromoteVarHandlesToArgsPass() { + return std::make_unique(); } static PassRegistration pass( "tf-promote-resources-to-args", "Promote resources reads/writes to function inputs/outputs."); -static PassRegistration saved_model_pass( - "tf-saved-model-promote-var-handles-to-args", - "Promote tf.VarHandleOps to function arguments in a format of " - "TensorFlowSavedModelDialect."); +static PassRegistration var_handle_pass( + "tf-promote-var-handles-to-args", + "Promote tf.VarHandleOps to function arguments."); } // namespace TF } // namespace mlir From 319df5224cf9db2135b05f6a3d3a803b3938d5a3 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Mon, 8 Jun 2020 08:08:07 -0700 Subject: [PATCH 037/178] [tfdbg2] Fix bug in which enable_check_numerics() errors due to empty file path The helper function `guess_is_tensorflow_py_library`() throws errors for file paths that do not have expected extensions for Python source files. This is why it errors out when the file path is an empty string, which has been observed to happen by users. This CL makes the helper function not throw errors and return False for such malformed file paths instead. PiperOrigin-RevId: 315276482 Change-Id: Ib19750d502cd55fcfbef6f95a2064adc2e0816e9 --- tensorflow/python/debug/lib/source_utils.py | 11 +++-------- tensorflow/python/debug/lib/source_utils_test.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/debug/lib/source_utils.py b/tensorflow/python/debug/lib/source_utils.py index 033e9c4361e..1e9f7ee82a2 100644 --- a/tensorflow/python/debug/lib/source_utils.py +++ b/tensorflow/python/debug/lib/source_utils.py @@ -69,18 +69,13 @@ def guess_is_tensorflow_py_library(py_file_path): py_file_path: full path of the Python source file in question. Returns: - (`bool`) Whether the file is a part of the tensorflow library. - - Raises: - ValueError: if the extension name of py_file_path does not indicate a Python - source file (compiled or uncompiled). + (`bool`) Whether the file is inferred to be a part of the tensorflow + library. """ if (not is_extension_uncompiled_python_source(py_file_path) and not is_extension_compiled_python_source(py_file_path)): - raise ValueError( - "Input file path (%s) is not a Python source file." % py_file_path) + return False py_file_path = _norm_abs_path(py_file_path) - return ((py_file_path.startswith(_TENSORFLOW_BASEDIR) or py_file_path.startswith(_ABSL_BASEDIR)) and not py_file_path.endswith("_test.py") and diff --git a/tensorflow/python/debug/lib/source_utils_test.py b/tensorflow/python/debug/lib/source_utils_test.py index 89964a21ba7..c9934c4aac8 100644 --- a/tensorflow/python/debug/lib/source_utils_test.py +++ b/tensorflow/python/debug/lib/source_utils_test.py @@ -126,10 +126,16 @@ class GuessIsTensorFlowLibraryTest(test_util.TensorFlowTestCase): source_utils.guess_is_tensorflow_py_library(os.path.normpath( "site-packages/tensorflow/python/debug/examples/v3/example_v3.py"))) - def testNonPythonFileRaisesException(self): - with self.assertRaisesRegexp(ValueError, r"is not a Python source file"): - source_utils.guess_is_tensorflow_py_library( - os.path.join(os.path.dirname(self.curr_file_path), "foo.cc")) + def testReturnsFalseForNonPythonFile(self): + self.assertFalse( + source_utils.guess_is_tensorflow_py_library( + os.path.join(os.path.dirname(self.curr_file_path), "foo.cc"))) + + def testReturnsFalseForStdin(self): + self.assertFalse(source_utils.guess_is_tensorflow_py_library("")) + + def testReturnsFalseForEmptyFileName(self): + self.assertFalse(source_utils.guess_is_tensorflow_py_library("")) class SourceHelperTest(test_util.TensorFlowTestCase): From f25f8ea21369f8fae06ef39cb0915fe2b945ab68 Mon Sep 17 00:00:00 2001 From: nammbash Date: Mon, 8 Jun 2020 08:37:54 -0700 Subject: [PATCH 038/178] Review Comments --- tensorflow/core/graph/mkl_graph_util.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 3ea23250fc5..3044b33273e 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -126,10 +126,10 @@ inline string GetMklEagerOpName(const string& name) { } #ifdef ENABLE_INTEL_MKL_BFLOAT16 -static inline bool CheckBfloat16Support(DataType T) { +static inline bool HasBfloat16Support(DataType T) { static absl::once_flag cpu_bfloat16_warn_once_flag; // Restrict bfloat16 ops to platforms with at least AVX512 support, fall back - // to Eigen implementation. + // to Eigen implementation otherwise. if (!(port::TestCPUFeature(port::CPUFeature::AVX512F)) && T == DT_BFLOAT16) { absl::call_once(cpu_bfloat16_warn_once_flag, [] { LOG(ERROR) @@ -159,7 +159,7 @@ static inline bool IsMklLayoutDependentOp(const string& op_name, DataType T) { #ifdef ENABLE_INTEL_MKL_BFLOAT16 // Restrict regular ops to FLOAT and BFLOAT16 if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) { - return (T == DT_FLOAT || (T == DT_BFLOAT16 && CheckBfloat16Support(T))); + return (T == DT_FLOAT || (T == DT_BFLOAT16 && HasBfloat16Support(T))); } #else // Restrict regular ops to FLOAT @@ -217,7 +217,7 @@ static inline bool IsMklNameChangeOp(const string& op_name, DataType T) { T == DT_DOUBLE || T == DT_FLOAT); #ifdef ENABLE_INTEL_MKL_BFLOAT16 isTypeAllowed = - isTypeAllowed || (T == DT_BFLOAT16 && CheckBfloat16Support(T)); + isTypeAllowed || (T == DT_BFLOAT16 && HasBfloat16Support(T)); #endif return isTypeAllowed; } From 3b2109f7de7689a33c6f94251fe2bd74a1055046 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 8 Jun 2020 08:46:51 -0700 Subject: [PATCH 039/178] Update the base class for _LinearModel in Feature column v1. PiperOrigin-RevId: 315282469 Change-Id: I86a793c89b8098723750c631d858bc360746942d --- tensorflow/python/feature_column/BUILD | 1 - tensorflow/python/feature_column/feature_column.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index 8f62fc2d1be..52f1186c5d9 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -48,7 +48,6 @@ py_library( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/eager:context", - "//tensorflow/python/keras/engine", # TODO(scottzhu): Remove metrics after we cleanup the keras internal cyclar dependency. # //third_party/tensorflow/python/feature_column:feature_column # //third_party/tensorflow/python/keras/engine:engine diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 07df4e914c9..3207fd550b4 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -144,7 +144,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras.engine import training from tensorflow.python.layers import base from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops @@ -165,8 +164,8 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_utils from tensorflow.python.util import nest -from tensorflow.python.util.tf_export import tf_export from tensorflow.python.util.compat import collections_abc +from tensorflow.python.util.tf_export import tf_export def _internal_input_layer(features, @@ -616,7 +615,7 @@ def _strip_leading_slashes(name): return name.rsplit('/', 1)[-1] -class _LinearModel(training.Model): +class _LinearModel(base.Layer): """Creates a linear model using feature columns. See `linear_model` for details. @@ -631,6 +630,12 @@ class _LinearModel(training.Model): name=None, **kwargs): super(_LinearModel, self).__init__(name=name, **kwargs) + # We force the keras_style to be True here, as a workaround to not being + # able to inherit keras.layers.Layer as base class. Setting this will let + # us skip all the legacy behavior for base.Layer. + # Also note that we use Layer as base class, instead of Model, since there + # isn't any Model specific behavior gets used, eg compile/fit. + self._keras_style = True self._feature_columns = _normalize_feature_columns( feature_columns) self._weight_collections = list(weight_collections or []) From ad6ccc651c6a0ab9abbb2da8a620906f11690df6 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 8 Jun 2020 08:47:38 -0700 Subject: [PATCH 040/178] Update sequence_feature_column_test to not rely on Keras. PiperOrigin-RevId: 315282586 Change-Id: I0457ae4072aa672ae6be1bfa176b3b9f3b8fea0d --- .../python/feature_column/sequence_feature_column_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/feature_column/sequence_feature_column_test.py b/tensorflow/python/feature_column/sequence_feature_column_test.py index d0cf5ee7670..e0cd73d17e4 100644 --- a/tensorflow/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/python/feature_column/sequence_feature_column_test.py @@ -24,6 +24,7 @@ from absl.testing import parameterized import numpy as np from tensorflow.python.client import session +from tensorflow.python.feature_column import feature_column_lib as fc_lib from tensorflow.python.feature_column import feature_column_v2 as fc from tensorflow.python.feature_column import sequence_feature_column as sfc from tensorflow.python.feature_column import serialization @@ -31,7 +32,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util -from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops @@ -132,7 +132,8 @@ def _get_sequence_dense_tensor(column, features): def _get_sequence_dense_tensor_state(column, features): - state_manager = fc._StateManagerImpl(Layer(), trainable=True) + state_manager = fc._StateManagerImpl( + fc_lib.DenseFeatures(column), trainable=True) column.create_state(state_manager) dense_tensor, lengths = column.get_sequence_dense_tensor( fc.FeatureTransformationCache(features), state_manager) From b8bd7b3483f63c8bda1cd2489e2e01db53d2f8b6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 09:30:37 -0700 Subject: [PATCH 041/178] Integrate LLVM at https://github.com/llvm/llvm-project/commit/92cb0ce8f814 PiperOrigin-RevId: 315290349 Change-Id: I2405c3505b6a860dd32f32d754d1a6da3f3acd29 --- .../mlir/tensorflow/utils/convert_tensor.cc | 22 +++++++------------ tensorflow/compiler/mlir/xla/hlo_utils.cc | 3 +-- .../compiler/mlir/xla/mlir_hlo_to_hlo.cc | 21 +++++++++++------- .../compiler/mlir/xla/tests/convert.mlir | 2 +- .../compiler/xla/service/llvm_ir/llvm_util.cc | 1 + third_party/mlir/BUILD | 20 ----------------- 6 files changed, 24 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc index b28f26b6c3c..359314a64b0 100644 --- a/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc +++ b/tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.cc @@ -89,12 +89,11 @@ StatusOr ConvertFlatTensor(const Tensor& input_tensor, ElementsAttr ConvertBf16Tensor(const Tensor& input_tensor, RankedTensorType type) { - auto flat = input_tensor.flat(); - llvm::SmallVector floats; - floats.reserve(flat.size()); - for (bfloat16 v : llvm::makeArrayRef(flat.data(), flat.size())) - floats.push_back(llvm::APFloat(static_cast(v))); - return mlir::DenseElementsAttr::get(type, llvm::makeArrayRef(floats)); + auto buffer = llvm::makeArrayRef(static_cast(input_tensor.data()), + input_tensor.TotalBytes()); + return mlir::DenseElementsAttr::getFromRawBuffer( + type, buffer, + /*isSplatBuffer=*/type.getNumElements() == 1); } ElementsAttr ConvertHalfTensor(const Tensor& tensor, RankedTensorType type) { @@ -280,16 +279,11 @@ void ConvertIntElementsAttr(const mlir::DenseIntElementsAttr attr, void ConvertBfloat16ElementsAttr(const mlir::DenseFPElementsAttr attr, protobuf::RepeatedField* output) { - // Bfloat16 is internally represented as `double` in MLIR. if (attr.isSplat()) { - double v = attr.getSplatValue(); - bfloat16 bf16_val = static_cast(v); - output->Add(absl::bit_cast(bf16_val)); + output->Add((*attr.begin()).bitcastToAPInt().getSExtValue()); } else { - for (auto v : attr.getValues()) { - bfloat16 bf16_val = static_cast(v); - output->Add(absl::bit_cast(bf16_val)); - } + for (const llvm::APFloat value : attr.getFloatValues()) + output->Add(value.bitcastToAPInt().getSExtValue()); } } diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc index dc801f64ede..e1b5feeb117 100644 --- a/tensorflow/compiler/mlir/xla/hlo_utils.cc +++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc @@ -44,8 +44,7 @@ template } mlir::APFloat ConvertToAPFloat(bfloat16 val) { - // bfloat16 values are stored as double in MLIR. - return llvm::APFloat(static_cast(val)); + return llvm::APFloat(llvm::APFloat::BFloat(), llvm::APInt(16, val.value)); } mlir::APFloat ConvertToAPFloat(half val) { diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc index 1c25625802f..bd61b10f827 100644 --- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc +++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc @@ -979,10 +979,10 @@ StatusOr CreateLiteralFromAttr(ElementsAttr attr) { values.reserve(attr.getNumElements()); for (APFloat val : attr.getValues()) { bool loses_info = false; - CHECK_EQ(val.convert(llvm::APFloat::IEEEsingle(), - llvm::APFloat::rmTowardZero, &loses_info), - llvm::APFloat::opOK); - CHECK(!loses_info); + TF_RET_CHECK(val.convert(llvm::APFloat::IEEEsingle(), + llvm::APFloat::rmTowardZero, + &loses_info) == llvm::APFloat::opOK); + TF_RET_CHECK(!loses_info); values.push_back(xla::half(val.convertToFloat())); } xla::Array source_data(shape.dimensions()); @@ -992,10 +992,15 @@ StatusOr CreateLiteralFromAttr(ElementsAttr attr) { case xla::PrimitiveType::BF16: { xla::Array source_data(shape.dimensions()); auto attr_values = attr.getValues(); - std::vector values_double(source_data.num_elements()); - for (auto index_and_value : llvm::enumerate(attr_values)) { - values_double[index_and_value.index()] = - index_and_value.value().convertToDouble(); + std::vector values_double; + values_double.reserve(source_data.num_elements()); + for (APFloat val : attr_values) { + bool loses_info = false; + TF_RET_CHECK(val.convert(llvm::APFloat::IEEEdouble(), + llvm::APFloat::rmTowardZero, + &loses_info) == llvm::APFloat::opOK); + TF_RET_CHECK(!loses_info); + values_double.push_back(val.convertToDouble()); } source_data.SetValues(values_double); return xla::LiteralUtil::ConvertF64ToBF16( diff --git a/tensorflow/compiler/mlir/xla/tests/convert.mlir b/tensorflow/compiler/mlir/xla/tests/convert.mlir index 63ce724adb7..26d91132d32 100644 --- a/tensorflow/compiler/mlir/xla/tests/convert.mlir +++ b/tensorflow/compiler/mlir/xla/tests/convert.mlir @@ -191,7 +191,7 @@ func @const_f32_bf16() -> tensor { // CHECK-LABEL: func @const_bf16_f64 func @const_bf16_f64() -> tensor { - // CHECK-NEXT: [[CST:%.+]] = xla_hlo.constant dense<4.2{{0*}}e+00> : tensor + // CHECK-NEXT: [[CST:%.+]] = xla_hlo.constant dense<4.187500e+00> : tensor %cst = xla_hlo.constant dense<4.2> : tensor %0 = "xla_hlo.convert"(%cst) : (tensor) -> tensor // CHECK-NEXT: return [[CST]] diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 6375bf7341f..e4ca08f972b 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -22,6 +22,7 @@ limitations under the License. #include "absl/base/casts.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD index 27159203cf9..dda04d560c0 100644 --- a/third_party/mlir/BUILD +++ b/third_party/mlir/BUILD @@ -686,25 +686,6 @@ gentbl( ], ) -gentbl( - name = "MLIRShapeCanonicalizationIncGen", - strip_include_prefix = "include/mlir/Dialect/Shape", - tbl_outs = [ - ( - "-gen-rewriters", - "include/mlir/Dialect/Shape/IR/ShapeCanonicalization.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "lib/Dialect/Shape/IR/ShapeCanonicalization.td", - td_srcs = [ - ":StdOpsTdFiles", - "include/mlir/Dialect/Shape/IR/ShapeBase.td", - "include/mlir/Dialect/Shape/IR/ShapeOps.td", - "include/mlir/Interfaces/InferTypeOpInterface.td", - ], -) - cc_library( name = "Shape", srcs = glob( @@ -723,7 +704,6 @@ cc_library( ":Dialect", ":IR", ":InferTypeOpInterface", - ":MLIRShapeCanonicalizationIncGen", ":ShapeOpsIncGen", ":SideEffects", ":Support", From bf1b3d7e70e39672ab35ae7e04a15fa082d4b8e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 09:45:23 -0700 Subject: [PATCH 042/178] [TF/MLIR] Adds legalization rule for xla_hlo.dot_general. An xla_hlo.dot_general op will be converted to tf.BatchMatMulV2 op. However, we also need to insert some transpose ops to order batch/contracting/out dimensions properly and then flatten the contracting/out dimensions because BatchMatMul does not support multiple contracting dimensions. PiperOrigin-RevId: 315293215 Change-Id: Iceb3738025e5c8a730340807b1d6d17c10d7ecc2 --- .../mlir/tensorflow/tests/legalize_hlo.mlir | 16 ++ .../tensorflow/transforms/legalize_hlo.cc | 206 ++++++++++++++++++ .../transforms/legalize_hlo_patterns.td | 7 + 3 files changed, 229 insertions(+) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/legalize_hlo.mlir b/tensorflow/compiler/mlir/tensorflow/tests/legalize_hlo.mlir index 00e35460f20..2b4f88a3524 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/legalize_hlo.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/legalize_hlo.mlir @@ -723,6 +723,11 @@ func @broadcast_in_dim_general_case(%arg0: tensor<3x1x16xf32>) -> tensor<3x8x8x1 return %0 : tensor<3x8x8x16xf32> } +func @convert_dot_general(%arg0: tensor<3x2x6x5x1xf32>, %arg1: tensor<3x2x4x6xf32>) -> tensor<3x5x1x4xf32> { + %0 = "xla_hlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = {lhs_batching_dimensions = dense<0> : tensor<1xi64>, lhs_contracting_dimensions = dense<[1, 2]> : tensor<2xi64>, rhs_batching_dimensions = dense<0> : tensor<1xi64>, rhs_contracting_dimensions = dense<[1, 3]> : tensor<2xi64>}, precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<3x2x6x5x1xf32>, tensor<3x2x4x6xf32>) -> tensor<3x5x1x4xf32> + return %0 : tensor<3x5x1x4xf32> +} + // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py // CHECK-LABEL: func @biasAdd_NHWC( @@ -1596,3 +1601,14 @@ func @broadcast_in_dim_general_case(%arg0: tensor<3x1x16xf32>) -> tensor<3x8x8x1 // CHECK: [[VAL_402:%.*]] = "tf.BroadcastTo"([[VAL_400]], [[VAL_401]]) : (tensor<3x1x1x16xf32>, tensor<4xi64>) -> tensor<3x8x8x16xf32> // CHECK: return [[VAL_402]] : tensor<3x8x8x16xf32> // CHECK: } + +// CHECK-LABEL: func @convert_dot_general( +// CHECK-SAME: [[VAL_396:%.*]]: tensor<3x2x6x5x1xf32>, [[VAL_397:%.*]]: tensor<3x2x4x6xf32>) -> tensor<3x5x1x4xf32> { +// CHECK: [[VAL_398:%.*]] = "tf.Transpose"([[VAL_396]], {{.*}}) : (tensor<3x2x6x5x1xf32>, tensor<5xi64>) -> tensor<3x5x1x2x6xf32> +// CHECK: [[VAL_399:%.*]] = "tf.Transpose"([[VAL_397]], {{.*}}) : (tensor<3x2x4x6xf32>, tensor<4xi64>) -> tensor<3x2x6x4xf32> +// CHECK: [[VAL_400:%.*]] = "tf.Reshape"([[VAL_398]], {{.*}}) : (tensor<3x5x1x2x6xf32>, tensor<3xi64>) -> tensor<3x5x12xf32> +// CHECK: [[VAL_401:%.*]] = "tf.Reshape"([[VAL_399]], {{.*}}) : (tensor<3x2x6x4xf32>, tensor<3xi64>) -> tensor<3x12x4xf32> +// CHECK: [[VAL_402:%.*]] = "tf.BatchMatMulV2"([[VAL_400]], [[VAL_401]]) {adj_x = false, adj_y = false} : (tensor<3x5x12xf32>, tensor<3x12x4xf32>) -> tensor<3x5x4xf32> +// CHECK: [[VAL_403:%.*]] = "tf.Reshape"([[VAL_402]], {{.*}}) : (tensor<3x5x4xf32>, tensor<4xi64>) -> tensor<3x5x1x4xf32> +// CHECK: return [[VAL_403]] : tensor<3x5x1x4xf32> +// CHECK: } diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc index f6c00e8cb82..267819f6c9a 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc @@ -15,10 +15,15 @@ limitations under the License. // This file implements logic for legalizing HLO to TensorFlow. +#include +#include #include +#include #include +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project @@ -40,6 +45,8 @@ namespace mlir { namespace TF { namespace { +using xla_hlo::DotDimensionNumbers; + class ConvertSliceOp : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -75,6 +82,205 @@ class ConvertSliceOp : public OpConversionPattern { }; }; +// Appends all elements in `range` to `values`. +template +void Append(llvm::SmallVectorImpl &values, Range &&range) { + values.insert(values.end(), range.begin(), range.end()); +} + +// Appends all elements in `range` to `values`. +template +void Append(llvm::SmallVectorImpl &values, Range &&range, + RangeTs &&... ranges) { + values.insert(values.end(), range.begin(), range.end()); + Append(values, ranges...); +} + +// Returns the number of elements in `range`. +template +size_t Size(Range &&range) { + return range.size(); +} + +// Returns the total number of elements in a variadic number of `ranges`. +template +size_t Size(Range &&range, RangeTs &&... ranges) { + return range.size() + Size(std::forward(ranges)...); +} + +// Concats all elements in `ranges` and returns a small vector as a result. +template +llvm::SmallVector Concat(RangeTs &&... ranges) { + llvm::SmallVector results; + results.reserve(Size(std::forward(ranges)...)); + Append(results, std::forward(ranges)...); + return results; +} + +// A struct to hold axes and sizes for a set of dimensions. +struct DimensionSetVector { + llvm::ArrayRef AxesArray() const { return axes.getArrayRef(); } + llvm::ArrayRef SizesArray() const { return sizes.getArrayRef(); } + + llvm::SmallSetVector axes; + llvm::SmallSetVector sizes; +}; + +// A struct to hold information about dimensions of dot_general operands. +class DotDimensionsInfo { + public: + DotDimensionsInfo(ShapedType type, DenseIntElementsAttr batch_dimensions, + DenseIntElementsAttr contracting_dimensions) { + const int rank = type.getRank(); + for (const int dim : batch_dimensions.getValues()) { + batch_dimensions_.axes.insert(dim); + batch_dimensions_.sizes.insert(type.getDimSize(dim)); + } + + for (const int dim : contracting_dimensions.getValues()) { + contracting_dimensions_.axes.insert(dim); + contracting_dimensions_.sizes.insert(type.getDimSize(dim)); + } + + for (int dim = 0; dim < rank; ++dim) { + if (contracting_dimensions_.axes.count(dim) > 0 || + batch_dimensions_.axes.count(dim) > 0) { + continue; + } + out_dimensions_.axes.insert(dim); + out_dimensions_.sizes.insert(type.getDimSize(dim)); + } + } + + const DimensionSetVector &batch_dimensions() const { + return batch_dimensions_; + } + const DimensionSetVector &contracting_dimensions() const { + return contracting_dimensions_; + } + // Out dimensions are any dimensions that are neither batch nor contracting + // dimensions, hence will be propagated to output shape. + const DimensionSetVector &out_dimensions() const { return out_dimensions_; } + + // Returns the total dimension size after flattening all contracting + // dimensions. + int FlattenedContractingDimensionSize() const { + return std::accumulate(contracting_dimensions_.sizes.begin(), + contracting_dimensions_.sizes.end(), 1, + std::multiplies()); + } + + // Returns the total dimension size after flattening all out dimensions. + int FlattenedOutDimensionSize() const { + return std::accumulate(out_dimensions_.sizes.begin(), + out_dimensions_.sizes.end(), 1, + std::multiplies()); + } + + private: + DimensionSetVector batch_dimensions_; + DimensionSetVector contracting_dimensions_; + // Out dimensions are any dimensions that are neither batch nor contracting + // dimensions, hence will be propagated to output shape. + DimensionSetVector out_dimensions_; +}; + +// Converts xla_hlo.dot to tf.BatchMatMul. Reshape or Transpose ops will also be +// inserted to convert to well-formed matrix multiply. +Value ConvertDotGeneralOp(PatternRewriter &rewriter, Operation *old_op) { + auto dot_general_op = cast(old_op); + auto lhs_type = dot_general_op.lhs().getType().cast(); + auto rhs_type = dot_general_op.rhs().getType().cast(); + auto result_type = dot_general_op.getResult().getType().cast(); + DotDimensionNumbers dot_dimension_numbers = + dot_general_op.dot_dimension_numbers(); + mlir::Location loc = dot_general_op.getLoc(); + const int lhs_rank = lhs_type.getRank(); + const int rhs_rank = rhs_type.getRank(); + + // Collects lhs and rhs dimensions information. + DotDimensionsInfo lhs_dot_dimensions_info( + lhs_type, dot_dimension_numbers.lhs_batching_dimensions(), + dot_dimension_numbers.lhs_contracting_dimensions()); + DotDimensionsInfo rhs_dot_dimensions_info( + rhs_type, dot_dimension_numbers.rhs_batching_dimensions(), + dot_dimension_numbers.rhs_contracting_dimensions()); + + // Transposes lhs shape to be in the order of {batch_dimensions, + // out_dimensions, contracting dimensions}. + llvm::SmallVector lhs_permutation = Concat( + lhs_dot_dimensions_info.batch_dimensions().AxesArray(), + lhs_dot_dimensions_info.out_dimensions().AxesArray(), + lhs_dot_dimensions_info.contracting_dimensions().AxesArray()); + llvm::SmallVector lhs_transposed_shape = Concat( + lhs_dot_dimensions_info.batch_dimensions().SizesArray(), + lhs_dot_dimensions_info.out_dimensions().SizesArray(), + lhs_dot_dimensions_info.contracting_dimensions().SizesArray()); + auto lhs_transposed = rewriter.create( + loc, + RankedTensorType::get(lhs_transposed_shape, lhs_type.getElementType()), + dot_general_op.lhs(), + DenseIntElementsAttr::get( + RankedTensorType::get({lhs_rank}, rewriter.getI64Type()), + lhs_permutation)); + + // Transposes rhs shape to be in the order of {batch_dimensions, contracting + // dimensions, out_dimensions}. + llvm::SmallVector rhs_permutation = Concat( + rhs_dot_dimensions_info.batch_dimensions().AxesArray(), + rhs_dot_dimensions_info.contracting_dimensions().AxesArray(), + rhs_dot_dimensions_info.out_dimensions().AxesArray()); + llvm::SmallVector rhs_transposed_shape = Concat( + rhs_dot_dimensions_info.batch_dimensions().SizesArray(), + rhs_dot_dimensions_info.contracting_dimensions().SizesArray(), + rhs_dot_dimensions_info.out_dimensions().SizesArray()); + auto rhs_transposed = rewriter.create( + loc, + RankedTensorType::get(rhs_transposed_shape, rhs_type.getElementType()), + dot_general_op.rhs(), + DenseIntElementsAttr::get( + RankedTensorType::get({rhs_rank}, rewriter.getI64Type()), + rhs_permutation)); + + // Reshapes lhs to flatten out_dimensions and contracting_dimensions. + llvm::SmallVector lhs_flattened_shape = Concat( + lhs_dot_dimensions_info.batch_dimensions().SizesArray(), + llvm::ArrayRef{ + lhs_dot_dimensions_info.FlattenedOutDimensionSize()}, + llvm::ArrayRef{ + lhs_dot_dimensions_info.FlattenedContractingDimensionSize()}); + auto lhs_flattend = rewriter.create( + loc, + RankedTensorType::get(lhs_flattened_shape, lhs_type.getElementType()), + lhs_transposed.getResult()); + + // Reshapes rhs to flatten out_dimensions and contracting_dimensions. + llvm::SmallVector rhs_flattened_shape = Concat( + rhs_dot_dimensions_info.batch_dimensions().SizesArray(), + llvm::ArrayRef{ + rhs_dot_dimensions_info.FlattenedContractingDimensionSize()}, + llvm::ArrayRef{ + rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); + auto rhs_flattend = rewriter.create( + loc, + RankedTensorType::get(rhs_flattened_shape, rhs_type.getElementType()), + rhs_transposed.getResult()); + + // Creates matmul op of `lhs_flattend` and `rhs_flattend`. + llvm::SmallVector matmul_shape = + Concat(lhs_dot_dimensions_info.batch_dimensions().SizesArray(), + llvm::ArrayRef{ + lhs_dot_dimensions_info.FlattenedOutDimensionSize()}, + llvm::ArrayRef{ + rhs_dot_dimensions_info.FlattenedOutDimensionSize()}); + auto matmul = rewriter.create( + loc, RankedTensorType::get(matmul_shape, result_type.getElementType()), + lhs_flattend.getResult(), rhs_flattend.getResult()); + auto reshaped = + rewriter.create(loc, result_type, matmul.getResult()); + return reshaped.getResult(); +} + class LegalizeHloToTf : public PassWrapper { public: LegalizeHloToTf() = default; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td index df78aa97f01..3e910cd9512 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td +++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo_patterns.td @@ -184,3 +184,10 @@ def ConvertDotOp : NativeCodeCall<"ConvertDotOp($_builder, " def : Pat<(HLO_DotOp:$old_value AnyStaticShapeTensor:$lhs, AnyStaticShapeTensor:$rhs, $precision_config), (ConvertDotOp $old_value)>; + +def ConvertDotGeneralOp : NativeCodeCall<"ConvertDotGeneralOp($_builder, " + "$0.getDefiningOp())">; +def : Pat<(HLO_DotGeneralOp:$old_value AnyStaticShapeTensor:$lhs, + AnyStaticShapeTensor:$rhs, $dot_dimension_numbers, + $precision_config), + (ConvertDotGeneralOp $old_value)>; From fcfdbcf14abc526a7f78468c19345a60d64573b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 09:49:22 -0700 Subject: [PATCH 043/178] Revert of [XLA] Add support for sinking broadcasts through ops with multiple broadcasts operands. as it is causing some internal failures. Investigation in progress. PiperOrigin-RevId: 315293975 Change-Id: If65d7aaf53f29cac52072bc14b06e3b5a8c5fc49 --- .../xla/service/algebraic_simplifier.cc | 54 ++++---------- .../xla/service/algebraic_simplifier_test.cc | 73 ------------------- 2 files changed, 14 insertions(+), 113 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 4025cb46f18..e0a8b87c83b 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -3058,20 +3058,6 @@ AlgebraicSimplifierVisitor::TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( return false; } HloInstruction* operand = broadcast->mutable_operand(0); - auto is_scalar_broadcast = [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kBroadcast && - ShapeUtil::IsScalar(instruction->operand(0)->shape()); - }; - auto is_equal_broadcast = [operand, - broadcast](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kBroadcast && - ShapeUtil::Equal(operand->shape(), - instruction->operand(0)->shape()) && - broadcast->dimensions() == instruction->dimensions(); - }; - auto is_compatible_broadcast = [&](const HloInstruction* instruction) { - return is_scalar_broadcast(instruction) || is_equal_broadcast(instruction); - }; for (HloInstruction* user : broadcast->users()) { if (user->user_count() == 0 && user != computation_->root_instruction()) { continue; @@ -3090,20 +3076,18 @@ AlgebraicSimplifierVisitor::TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( continue; } - // Check if all the operands of the user are compatible broadcasts for - // sinking. (They are either scalar broadcasts or broadcasts casting - // from/to the same shape/dimensions) - int64 compatible_broadcast_count = 0; + // Find the unique non-scalar operand or continue if there isn't one. + int64 scalar_broadcast_count = 0; int64 broadcast_use_count = 0; for (HloInstruction* user_operand : user->operands()) { - if (is_compatible_broadcast(user_operand)) { - ++compatible_broadcast_count; + if (user_operand->opcode() == HloOpcode::kBroadcast && + ShapeUtil::IsScalar(user_operand->operand(0)->shape())) { + ++scalar_broadcast_count; } else if (broadcast == user_operand) { ++broadcast_use_count; } } - if (compatible_broadcast_count + broadcast_use_count != - user->operand_count()) { + if (scalar_broadcast_count + broadcast_use_count != user->operand_count()) { continue; } std::vector new_operands; @@ -3111,24 +3095,14 @@ AlgebraicSimplifierVisitor::TryToSinkBroadcastAfterOpWithUniqueNonScalarOperand( Shape changed_shape; for (HloInstruction* user_operand : user->operands()) { - // If this is a broadcast operand that is not our original broadcast input - // to this function then we might need to change the input. - if (is_compatible_broadcast(user_operand)) { - // If this is a broadcast from a scalar value rewrite a broadcast from - // the scalar to the new shape enforced from the other broadcast - // operands. - if (is_scalar_broadcast(user_operand)) { - changed_shape = ShapeUtil::ChangeElementType( - operand->shape(), user_operand->shape().element_type()); - simplifier_->UpdateLayout(&changed_shape); - new_operands.push_back( - computation_->AddInstruction(HloInstruction::CreateBroadcast( - changed_shape, user_operand->mutable_operand(0), {}))); - } else { - // For the non-scalar broadcasts we guarantee that the shape of the - // operand of the broadcast needs to be already a compatible shape. - new_operands.push_back(user_operand->mutable_operand(0)); - } + if (user_operand->opcode() == HloOpcode::kBroadcast && + ShapeUtil::IsScalar(user_operand->operand(0)->shape())) { + changed_shape = ShapeUtil::ChangeElementType( + operand->shape(), user_operand->shape().element_type()); + simplifier_->UpdateLayout(&changed_shape); + new_operands.push_back( + computation_->AddInstruction(HloInstruction::CreateBroadcast( + changed_shape, user_operand->mutable_operand(0), {}))); } else { CHECK_EQ(broadcast, user_operand); new_operands.push_back(operand); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index bcfc2fdc740..3ac47821654 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -338,79 +338,6 @@ TEST_F(AlgebraicSimplifierTest, MultiplyReassociateMergeBroadcastedConstants) { m::ConstantScalar(3.0)))))); } -TEST_F(AlgebraicSimplifierTest, ElementwiseSinkMultipleBroadcastsScalar) { - const char* kModuleStr = R"( - HloModule m - test { - p0 = f32[] parameter(0) - p1 = f32[] parameter(1) - b0 = f32[4] broadcast(p0), dimensions={} - b1 = f32[4] broadcast(p1), dimensions={} - ROOT multiply = f32[4] multiply(b1, b0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT( - m->entry_computation()->root_instruction(), - GmockMatch(m::Broadcast(m::Multiply(m::Broadcast(m::Parameter(1)), - m::Broadcast(m::Parameter(0)))))); -} - -TEST_F(AlgebraicSimplifierTest, ElementwiseSinkMultipleBroadcastsConstantMix) { - const char* kModuleStr = R"( - HloModule m - test { - p0 = f32[4] parameter(0) - c0 = f32[] constant(2.0) - b0 = f32[4,2] broadcast(c0), dimensions={} - b1 = f32[4,2] broadcast(p0), dimensions={0} - ROOT multiply = f32[4,2] multiply(b1, b0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT(m->entry_computation()->root_instruction(), - GmockMatch(m::Broadcast(m::Multiply( - m::Parameter(0), m::Broadcast(m::ConstantScalar(2.0)))))); -} - -TEST_F(AlgebraicSimplifierTest, ElementwiseSinkMultipleBroadcastsNonScalar) { - const char* kModuleStr = R"( - HloModule m - test { - p0 = f32[4] parameter(0) - p1 = f32[4] parameter(1) - b0 = f32[4,2] broadcast(p0), dimensions={0} - b1 = f32[4,2] broadcast(p1), dimensions={0} - ROOT multiply = f32[4,2] multiply(b1, b0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_TRUE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT( - m->entry_computation()->root_instruction(), - GmockMatch(m::Broadcast(m::Multiply(m::Parameter(1), m::Parameter(0))))); -} - -TEST_F(AlgebraicSimplifierTest, ElementwiseNoSinkBroadcastsDifferentDims) { - const char* kModuleStr = R"( - HloModule m - test { - p0 = f32[4] parameter(0) - p1 = f32[8] parameter(1) - b0 = f32[4,8] broadcast(p0), dimensions={0} - b1 = f32[4,8] broadcast(p1), dimensions={1} - ROOT multiply = f32[4,8] multiply(b1, b0) - } - )"; - TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(kModuleStr)); - ASSERT_FALSE(AlgebraicSimplifier(default_options_).Run(m.get()).ValueOrDie()); - EXPECT_THAT(m->entry_computation()->root_instruction(), - GmockMatch(m::Multiply(m::Broadcast(m::Parameter(1)), - m::Broadcast(m::Parameter(0))))); -} - TEST_F(AlgebraicSimplifierTest, MultiplyReassociateMultiplyOfConstantAndBroadcast) { const char* kModuleStr = R"( From 6ae6ef636d608822696508e9535cdb39d547a0c6 Mon Sep 17 00:00:00 2001 From: Sharada Shiddibhavi Date: Mon, 8 Jun 2020 10:02:22 -0700 Subject: [PATCH 044/178] Addressing review comments --- .../core/kernels/mkl_quantized_conv_ops.h | 20 +++++-------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 037a3a5f3ff..37022f46113 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -69,21 +69,12 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, const float* max_b = max_b_vector.flat().data(); float* min_c = (*min_c_vector)->flat().data(); float* max_c = (*max_c_vector)->flat().data(); -#ifdef ENABLE_MKLDNN_THREADPOOL - // TODO: Add eigen parallel_for - for(size_t n = 0; n < n_channel; ++n) { - float a_float_for_one_quant_level = - MklFloatForOneQuantizedLevel(min_a, max_a); - float b_float_for_one_quant_level = - MklFloatForOneQuantizedLevel(min_b[n], max_b[n]); - float c_float_for_one_quant_level = - a_float_for_one_quant_level * b_float_for_one_quant_level; - min_c[n] = c_float_for_one_quant_level * c_lowest; - max_c[n] = c_float_for_one_quant_level * c_highest; - } -#else + +#ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for - for (size_t n = 0; n < n_channel; ++n) { +#endif // ENABLE_MKLDNN_THREADPOOL + // TODO: Add eigen parallel_for + for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = MklFloatForOneQuantizedLevel(min_a, max_a); float b_float_for_one_quant_level = @@ -93,7 +84,6 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, min_c[n] = c_float_for_one_quant_level * c_lowest; max_c[n] = c_float_for_one_quant_level * c_highest; } -#endif // ENABLE_MKLDNN_THREADPOOL } } // namespace tensorflow From f32fcb56ffd2f6cbd362e5accab39b264a7d169f Mon Sep 17 00:00:00 2001 From: Sharada Shiddibhavi Date: Mon, 8 Jun 2020 10:05:49 -0700 Subject: [PATCH 045/178] Addressing review comments --- ...mkl_requantization_range_per_channel_op.cc | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc index 0a19573d901..3aa744c0a5e 100644 --- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -74,27 +74,11 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Find the ranges of each channel in parallel. float out_min_max = std::numeric_limits::min(); -#ifdef ENABLE_MKLDNN_THREADPOOL - // TODO: Add eigen parallel_for - for(size_t i = 0; i < depth; ++i) { - Eigen::Tensor min = - transposed_input.chip<0>(i).minimum(); - Eigen::Tensor max = - transposed_input.chip<0>(i).maximum(); - const int32_t min_per_channel = min(); - const int32_t max_per_channel = max(); - const int32_t abs_max = - std::max(std::abs(min_per_channel), std::abs(max_per_channel)); - float scale = - std::max(std::abs(input_min_data[i]), std::abs(input_max_data[i])); - ranges[i] = - scale * static_cast(abs_max) / static_cast(1L << 31); - if (min_per_channel < 0) is_non_negative = false; - out_min_max = std::max(out_min_max, ranges[i]); - } -#else +#ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for reduction(max : out_min_max) +#endif // ENABLE_MKLDNN_THREADPOOL + // TODO: Add eigen parallel_for for (size_t i = 0; i < depth; ++i) { Eigen::Tensor min = transposed_input.chip<0>(i).minimum(); @@ -113,7 +97,7 @@ class MklRequantizationRangePerChannelOp : public OpKernel { // Thread-local out_min_max. out_min_max = std::max(out_min_max, ranges[i]); } -#endif // ENABLE_MKLDNN_THREADPOOL + // All local out_min_max gets max-reduced into one global out_min_max at // the end of the loop by specifying reduction(max:out_min_max) along with // omp parallel for. From e2b4994f450f5e08c3718bcd8fc0e8c3cf984780 Mon Sep 17 00:00:00 2001 From: Sharada Shiddibhavi Date: Mon, 8 Jun 2020 10:07:11 -0700 Subject: [PATCH 046/178] Update mkl_quantized_conv_ops.h --- tensorflow/core/kernels/mkl_quantized_conv_ops.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 37022f46113..442f6a53047 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -73,8 +73,8 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, #ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for #endif // ENABLE_MKLDNN_THREADPOOL - // TODO: Add eigen parallel_for - for (size_t n = 0; n < n_channel; ++n) { + // TODO: Add eigen parallel_for + for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = MklFloatForOneQuantizedLevel(min_a, max_a); float b_float_for_one_quant_level = From d2e0f75817245a2dffddb714d4c62ded3dfe1a91 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 8 Jun 2020 10:04:00 -0700 Subject: [PATCH 047/178] Bump open source llvm revision to 92cb0ce8f814cd39ef4598fe074534cb787a9e78 PiperOrigin-RevId: 315297169 Change-Id: I6588ecf2717a2451906601143febf21073d7e570 --- tensorflow/workspace.bzl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4d84614b1e8..c41537d5cf3 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -655,8 +655,8 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ) # Check out LLVM and MLIR from llvm-project. - LLVM_COMMIT = "c4b5a66e44f031eb89c9d6ea32b144f1169bdbae" - LLVM_SHA256 = "8463cbed08a66c7171c831e9549076cf3fd4f7e6fe690b9b799d6afef2465007" + LLVM_COMMIT = "92cb0ce8f814cd39ef4598fe074534cb787a9e78" + LLVM_SHA256 = "46bdd427a48c8a7f2a10b147b0b618fe02c871e4740340808a81de42db923846" LLVM_URLS = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), "https://github.com/llvm/llvm-project/archive/{commit}.tar.gz".format(commit = LLVM_COMMIT), From 8abf495b5f4bc1180adbd1d1f2eeb73fd0c86144 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Mon, 8 Jun 2020 17:13:59 +0000 Subject: [PATCH 048/178] [ROCm] Fix for ROCm CSB breakage - 200608 The test `//tensorflow/python/ops/numpy_ops:np_math_ops_test_gpu` started failing on Friday (200605), with the following error ``` ===================================================================== FAIL: testExp (__main__.MathTest) testExp (__main__.MathTest) ---------------------------------------------------------------------- Traceback (most recent call last): ... ... AssertionError: Arrays are not almost equal to 7 decimals Mismatched elements: 1 / 2 (50%) Max absolute difference: 4.76837158e-07 Max relative difference: 6.45328909e-08 x: array([ 7.3890557, 20.085537 ]) y: array([ 7.3890562, 20.085537 ]) ---------------------------------------------------------------------- Ran 19 tests in 24.663s FAILED (failures=1, skipped=1) ================================================================================ ``` This is a new unit test introduced by the commit 84c796966b0b75a3561d4b076a8388f2091ff57d The fix is to drop the relative tolerance down to 1e-6 (from the default of 1e-7). This should be okay since numerous other TF unit tests also use the same value for relative tolerance. Note that this commit also changes the check call from `assert_almost_equal` to `assert_allclose`, to be able to specifyt eh relative tolerance --- tensorflow/python/ops/numpy_ops/np_math_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/numpy_ops/np_math_ops_test.py b/tensorflow/python/ops/numpy_ops/np_math_ops_test.py index 00d9a35a025..55c1f9a06bf 100644 --- a/tensorflow/python/ops/numpy_ops/np_math_ops_test.py +++ b/tensorflow/python/ops/numpy_ops/np_math_ops_test.py @@ -159,7 +159,7 @@ class MathTest(test.TestCase, parameterized.TestCase): actual.shape, expected.shape, 'Shape mismatch.\nActual: {}\nExpected: {}\n{}'.format( actual.shape, expected.shape, msg)) - np.testing.assert_almost_equal(actual.tolist(), expected.tolist()) + np.testing.assert_allclose(actual.tolist(), expected.tolist(), rtol=1e-6) def testArgsort(self): self._testUnaryOp(np_math_ops.argsort, np.argsort, 'argsort') From 7fc6bf6726cee3f6178caaafeedb7a98f776cb02 Mon Sep 17 00:00:00 2001 From: Sharada Shiddibhavi Date: Mon, 8 Jun 2020 10:30:03 -0700 Subject: [PATCH 049/178] Update tensorflow/core/kernels/mkl_quantized_conv_ops.h Co-authored-by: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> --- tensorflow/core/kernels/mkl_quantized_conv_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_quantized_conv_ops.h b/tensorflow/core/kernels/mkl_quantized_conv_ops.h index 442f6a53047..4121c88fb83 100644 --- a/tensorflow/core/kernels/mkl_quantized_conv_ops.h +++ b/tensorflow/core/kernels/mkl_quantized_conv_ops.h @@ -72,7 +72,7 @@ void MklQuantizationRangeForMultiplication(float min_a, float max_a, #ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for -#endif // ENABLE_MKLDNN_THREADPOOL +#endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for for (size_t n = 0; n < n_channel; ++n) { float a_float_for_one_quant_level = From 7b4b07c098170bd891f2426ee9c043249ac41983 Mon Sep 17 00:00:00 2001 From: Sharada Shiddibhavi Date: Mon, 8 Jun 2020 10:31:00 -0700 Subject: [PATCH 050/178] Update mkl_requantization_range_per_channel_op.cc --- .../core/kernels/mkl_requantization_range_per_channel_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc index 3aa744c0a5e..a43f6a71acf 100644 --- a/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc +++ b/tensorflow/core/kernels/mkl_requantization_range_per_channel_op.cc @@ -77,7 +77,7 @@ class MklRequantizationRangePerChannelOp : public OpKernel { #ifndef ENABLE_MKLDNN_THREADPOOL #pragma omp parallel for reduction(max : out_min_max) -#endif // ENABLE_MKLDNN_THREADPOOL +#endif // !ENABLE_MKLDNN_THREADPOOL // TODO: Add eigen parallel_for for (size_t i = 0; i < depth; ++i) { Eigen::Tensor min = From 60d63428b1a25ef2b7eb529209136e05ba18248c Mon Sep 17 00:00:00 2001 From: Berkin Ilbeyi Date: Mon, 8 Jun 2020 10:30:16 -0700 Subject: [PATCH 051/178] [XLA] Better alias handling in memory space assignment. Instead of using ad-hoc alias rules (for kWhile and kConditional), we use the aliases reported by HloAliasAnalysis. Using this, we can ensure aliased values get the same allocation. In practice, this enables us to share the buffer of DynamicUpdateSlice in a while loop in alternate memory. For sharing DUS buffers that are not in while loops, we need to make changes to HloDataflowAnalysis and copy insertion. PiperOrigin-RevId: 315303035 Change-Id: I5f1057ed7df2b1f09138512be248cdc09533f54f --- .../xla/service/memory_space_assignment.cc | 261 +++++++++++------- .../xla/service/memory_space_assignment.h | 54 ++-- .../service/memory_space_assignment_test.cc | 59 +++- 3 files changed, 256 insertions(+), 118 deletions(-) diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.cc b/tensorflow/compiler/xla/service/memory_space_assignment.cc index a28e71f6cc3..5803e21b277 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment.cc @@ -432,8 +432,8 @@ std::string MemorySpaceAssignment::AllocationValue::ToString() const { absl::StrAppend(&out, "\n position:\n"); absl::StrAppend(&out, " ", defining_position_.ToString(), "\n"); absl::StrAppend(&out, " uses:\n"); - for (const HloUse& use : uses_) { - absl::StrAppend(&out, " ", use.ToString(), "\n"); + for (const Use& use : uses_) { + absl::StrAppend(&out, " ", use.hlo_use.ToString(), "\n"); } return out; } @@ -515,6 +515,53 @@ void AlternateMemoryBestFitHeap::CreateAllocationValues( } } +void AlternateMemoryBestFitHeap::FindAliases( + std::vector* allocation_values) const { + absl::flat_hash_map + values_by_defining_inst; + for (AllocationValue& value : *allocation_values) { + CHECK_EQ(values_by_defining_inst.count(value.defining_instruction()), 0); + values_by_defining_inst[value.defining_instruction()] = &value; + } + auto maybe_add_alias_with_instruction = [&](const HloInstruction* instruction, + AllocationValue::Use* use) { + auto aliased_value_it = values_by_defining_inst.find(instruction); + if (aliased_value_it != values_by_defining_inst.end()) { + VLOG(3) << "Adding aliasing for use " << use->hlo_use.ToString() << " to " + << aliased_value_it->second->ToShortString(); + use->aliases.push_back(aliased_value_it->second->defining_position()); + } + }; + + for (AllocationValue& value : *allocation_values) { + for (AllocationValue::Use& use : value.uses()) { + // Find any aliases with the instruction itself (operand and output must + // alias). + maybe_add_alias_with_instruction(use.hlo_use.instruction, &use); + + // Find any aliases with the parameters of called computations. + for (const HloComputation* called_computation : + use.hlo_use.instruction->called_computations()) { + for (const HloInstruction* parameter_instruction : + called_computation->parameter_instructions()) { + maybe_add_alias_with_instruction(parameter_instruction, &use); + } + } + + // Special case for kWhile: the root of the body computation must alias as + // well. + if (use.hlo_use.instruction->opcode() == HloOpcode::kWhile) { + HloPosition root_alias{ + use.hlo_use.instruction->while_body()->root_instruction(), + use.hlo_use.operand_index}; + VLOG(3) << "Adding while body root aliasing for use " + << use.hlo_use.ToString() << " to " << root_alias; + use.aliases.push_back(root_alias); + } + } + } +} + std::vector AlternateMemoryBestFitHeap::GetSortedColocatedIntervals( const GlobalDecreasingSizeBestFitHeap::BufferInterval& interval) const { @@ -675,18 +722,18 @@ bool AlternateMemoryBestFitHeap::IsUseAllowedInAlternateMemory( // multiple called computations), determine if the parameter->first use // dependency is short. int64 conditional_time = instruction_schedule.at(use.instruction); - for (const HloUse& other_use : value.uses()) { - if (other_use.instruction != use.instruction) { + for (const AllocationValue::Use& other_use : value.uses()) { + if (other_use.hlo_use.instruction != use.instruction) { continue; } HloComputation* called_computation = - use.instruction->called_computations().at(other_use.operand_number - - 1); + use.instruction->called_computations().at( + other_use.hlo_use.operand_number - 1); const HloInstruction* parameter_instruction = called_computation->parameter_instruction(0); HloValue* parameter_value = &alias_analysis_.dataflow_analysis().GetUniqueValueAt( - parameter_instruction, other_use.operand_index); + parameter_instruction, other_use.hlo_use.operand_index); int64 parameter_time = instruction_schedule.at(parameter_instruction); int64 min_use_time = conditional_time; for (const HloUse& parameter_use : parameter_value->uses()) { @@ -947,6 +994,7 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( for (const auto& colocated_interval : colocated_intervals) { CreateAllocationValues(colocated_interval->buffer, &allocation_values); } + FindAliases(&allocation_values); const auto& instruction_schedule = hlo_live_range_.instruction_schedule(); // Data structure to contain the preferred offset for a given computation. @@ -969,25 +1017,26 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // Iterate over the uses. for (int use_idx = 0; use_idx < allocation_value.uses().size(); ++use_idx) { - const HloUse& use = allocation_value.uses().at(use_idx); - int64 use_time = instruction_schedule.at(use.instruction); + const AllocationValue::Use& use = allocation_value.uses().at(use_idx); + const HloUse hlo_use = use.hlo_use; + int64 use_time = instruction_schedule.at(hlo_use.instruction); int64 latest_prefetch_time = use_time; bool allow_no_copy_alternate_mem_allocation = true; absl::optional earliest_prefetch_time = absl::nullopt; // Sequential calls include kWhile, kCall, and kConditional opcodes. bool is_sequential_call = - (GetInstructionCallContext(use.instruction->opcode()) == + (GetInstructionCallContext(hlo_use.instruction->opcode()) == CallContext::kSequential); if (is_sequential_call) { for (const HloComputation* called_computation : - use.instruction->called_computations()) { + hlo_use.instruction->called_computations()) { const HloLiveRange::TimeBound& computation_span = hlo_live_range_.computation_span_times().at(called_computation); latest_prefetch_time = std::min(computation_span.start, latest_prefetch_time); } - if (use.instruction->opcode() == HloOpcode::kWhile) { + if (hlo_use.instruction->opcode() == HloOpcode::kWhile) { // Given an example while loop and flattened schedule (logical times // shown on the left): // @@ -1008,10 +1057,10 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // the interval to time 0-4. This is so that the remaining interval // (5-6) can be allocated separately and this buffer doesn't waste // alternate memory space within the while loop body. - HloComputation* while_body = use.instruction->while_body(); + HloComputation* while_body = hlo_use.instruction->while_body(); // We require while body ROOTs to be the last in the schedule. CHECK_EQ(instruction_schedule.at(while_body->root_instruction()) + 1, - instruction_schedule.at(use.instruction)) + instruction_schedule.at(hlo_use.instruction)) << "While body ROOTs need to be the last in the schedule! " "Please run RootInstructionSinker."; // Replace the use time with the parameter time so that we can decide @@ -1019,11 +1068,11 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // look at uses within the while loop body. use_time = instruction_schedule.at(while_body->parameter_instruction(0)); - } else if (use.instruction->opcode() == HloOpcode::kConditional) { + } else if (hlo_use.instruction->opcode() == HloOpcode::kConditional) { // Replace the use time with the earliest parameter of called // computations. for (const HloComputation* called_computation : - use.instruction->called_computations()) { + hlo_use.instruction->called_computations()) { use_time = std::min( use_time, instruction_schedule.at( called_computation->parameter_instruction(0))); @@ -1033,8 +1082,8 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // Add a required assignment in default memory if the use not allowed in // alternate memory. - if (!IsUseAllowedInAlternateMemory(allocation_value, use)) { - AddRequiredAssignment(allocation_value.value(), use.instruction, + if (!IsUseAllowedInAlternateMemory(allocation_value, hlo_use)) { + AddRequiredAssignment(allocation_value.value(), hlo_use.instruction, MemorySpace::kDefault, use_time); } else if (use_idx > 0) { // We allow buffers in alternate memory that are passed into @@ -1043,14 +1092,16 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // alternate memory allocation, subsequent uses cannot use the same // alternate memory allocation in order not to clobber data. So we force // default memory allocation for these subsequent uses. - const HloUse& previous_use = allocation_value.uses().at(use_idx - 1); - if (previous_use.instruction->opcode() == HloOpcode::kConditional && - previous_use.instruction != use.instruction) { + const AllocationValue::Use& previous_use = + allocation_value.uses().at(use_idx - 1); + if (previous_use.hlo_use.instruction->opcode() == + HloOpcode::kConditional && + previous_use.hlo_use.instruction != hlo_use.instruction) { allow_no_copy_alternate_mem_allocation = false; earliest_prefetch_time = - instruction_schedule.at(previous_use.instruction); - VLOG(3) << "Previous use (" << previous_use.ToString() << ") of use (" - << use.ToString() + instruction_schedule.at(previous_use.hlo_use.instruction); + VLOG(3) << "Previous use (" << previous_use.hlo_use.ToString() + << ") of use (" << hlo_use.ToString() << ") is a conditional, so this use will need to evict. " << "Earliest prefetch time = " << *earliest_prefetch_time; } @@ -1059,7 +1110,7 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // Bitcasts don't define buffers and don't directly consume buffers. Skip // allocating buffers for bitcast uses. The uses that feed from bitcasts // will be handled specially. - if (use.instruction->opcode() != HloOpcode::kBitcast) { + if (hlo_use.instruction->opcode() != HloOpcode::kBitcast) { AllocationRequest request; // Rarely, (e.g., when conditional true and false parameters are the // same), definition time can be the time of the conditional and use @@ -1072,7 +1123,7 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( allow_no_copy_alternate_mem_allocation; request.earliest_prefetch_time = earliest_prefetch_time; request.preferred_offset = preferred_offset; - request.use = use; + request.use = &use; request.allocation_value = &allocation_value; if (!AllocateSegment(request)) { // If the allocation finding failed (e.g., due to running out of @@ -1085,23 +1136,25 @@ bool AlternateMemoryBestFitHeap::AllocateColocatedIntervals( // If there are multiple uses, they can try using the memory allocation // already at the alternate memory. - definition_time = instruction_schedule.at(use.instruction); + definition_time = instruction_schedule.at(hlo_use.instruction); } - // If the use has been a sequential call (e.g. a while loop), the other - // colocated intervals must alias with this allocation. - if (is_sequential_call) { - MemorySpaceAssignment::Allocation* aliased_allocation = - GetLiveAllocationAt(*allocation_value.allocation_sequence(), - use_time); - AddAliasedRequiredAssignmentsForSequentialCall(use, aliased_allocation); - // Remember the preferred offset to be used inside while loop body - // computations. - if (aliased_allocation->memory_space() == MemorySpace::kAlternate && - use.instruction->opcode() == HloOpcode::kWhile) { - preferred_offset_for_computation[use.instruction->while_body()] = - aliased_allocation->chunk().offset; - } + // Propagate the allocation to any aliases this use might have had. + MemorySpaceAssignment::Allocation* aliased_allocation = + GetLiveAllocationAt(*allocation_value.allocation_sequence(), + use_time); + for (const HloPosition& aliased_position : use.aliases) { + AddAliasedRequiredAssignment(aliased_position.instruction, + aliased_position.index, + aliased_allocation); + } + + // Special case for while loops since the root offset must agree with + // other offsets: remember the preferred offset for the while loop body. + if (hlo_use.instruction->opcode() == HloOpcode::kWhile && + aliased_allocation->memory_space() == MemorySpace::kAlternate) { + preferred_offset_for_computation[hlo_use.instruction->while_body()] = + aliased_allocation->chunk().offset; } } if (!allocation_success) { @@ -1212,34 +1265,45 @@ void AlternateMemoryBestFitHeap::AllocateCrossProgramPrefetchBuffer( pending_required_assignments_.clear(); } -void AlternateMemoryBestFitHeap::AddAliasedRequiredAssignmentsForSequentialCall( - const HloUse& use, - const MemorySpaceAssignment::Allocation* aliased_allocation) { - // Add aliased required assignments. - if (use.instruction->opcode() == HloOpcode::kWhile) { - HloComputation* while_body = use.instruction->while_body(); - HloComputation* while_condition = use.instruction->while_condition(); - AddAliasedRequiredAssignment(while_condition->parameter_instruction(0), - use.operand_index, aliased_allocation); - AddAliasedRequiredAssignment(while_body->parameter_instruction(0), - use.operand_index, aliased_allocation); - AddAliasedRequiredAssignment(while_body->root_instruction(), - use.operand_index, aliased_allocation); - AddAliasedRequiredAssignment(use.instruction, use.operand_index, - aliased_allocation); - } else if (use.instruction->opcode() == HloOpcode::kConditional) { - HloComputation* called_computation = - use.instruction->called_computations().at(use.operand_number - 1); - AddAliasedRequiredAssignment(called_computation->parameter_instruction(0), - use.operand_index, aliased_allocation); - } else { - CHECK(use.instruction->opcode() == HloOpcode::kCall); - HloComputation* called_computation = - use.instruction->called_computations().at(0); - AddAliasedRequiredAssignment( - called_computation->parameter_instruction(use.operand_number), - use.operand_index, aliased_allocation); +absl::optional +AlternateMemoryBestFitHeap::RequiredMemoryAssignmentAt(const HloValue* buffer, + int64 time) const { + auto required_assignment_it = required_assignments_.find(buffer); + absl::optional required_assignment_at_time; + if (required_assignment_it != required_assignments_.end()) { + for (const RequiredMemoryAssignment& required_assignment : + required_assignment_it->second) { + if (required_assignment.time == time) { + // Sanity check that there is only one required at time. + CHECK(!required_assignment_at_time); + required_assignment_at_time = required_assignment; + } + } } + return required_assignment_at_time; +} + +absl::optional +AlternateMemoryBestFitHeap::AliasedRequiredAssignmentForUse( + const AllocationValue::Use& use) const { + absl::optional required_assignment; + for (const HloPosition& position : use.aliases) { + const HloValue* value = + &alias_analysis_.dataflow_analysis().GetUniqueValueAt( + position.instruction, position.index); + int64 time = + hlo_live_range_.instruction_schedule().at(position.instruction); + absl::optional required_assignment_for_alias = + RequiredMemoryAssignmentAt(value, time); + if (required_assignment == absl::nullopt) { + required_assignment = required_assignment_for_alias; + } else { + CHECK(required_assignment_for_alias == absl::nullopt || + required_assignment->equals_ignoring_time( + *required_assignment_for_alias)); + } + } + return required_assignment; } void AlternateMemoryBestFitHeap::AddAliasedRequiredAssignment( @@ -1429,24 +1493,6 @@ void AlternateMemoryBestFitHeap::AddToPendingChunks( CommitChunk(buffer_interval, chunk_candidate); } -absl::optional -AlternateMemoryBestFitHeap::RequiredMemoryAssignmentAt(const HloValue* buffer, - int64 time) const { - auto required_assignment_it = required_assignments_.find(buffer); - absl::optional required_assignment_at_time; - if (required_assignment_it != required_assignments_.end()) { - for (const RequiredMemoryAssignment& required_assignment : - required_assignment_it->second) { - if (required_assignment.time == time) { - // Sanity check that there is only one required at time. - CHECK(!required_assignment_at_time); - required_assignment_at_time = required_assignment; - } - } - } - return required_assignment_at_time; -} - bool AlternateMemoryBestFitHeap::AllocateSegment( const AllocationRequest& request) { auto allocation_sequence = request.allocation_value->allocation_sequence(); @@ -1457,7 +1503,7 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( MemorySpaceAssignment::Allocation* allocation = GetLiveAllocationAt(*allocation_sequence, request.end_time); CHECK_NE(allocation, nullptr); - allocation->AddUse(request.use); + allocation->AddUse(request.use->hlo_use); return true; } @@ -1467,8 +1513,9 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( << request.allocation_value->ToShortString() << " (" << request.start_time << ", " << request.end_time << ") latest prefetch = " << request.latest_prefetch_time - << " last use = " << request.allocation_value->use_times().back() - << " use = " << request.use.ToString() << ". Size = " << request.size + << " last use = " << request.allocation_value->uses().back().time + << " use = " << request.use->hlo_use.ToString() + << ". Size = " << request.size << ", def pos = " << defining_position.ToString(); CHECK_LE(request.start_time, request.end_time); @@ -1483,8 +1530,21 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( if (required_assignment_at_start) { required_memory_space_at_start = required_assignment_at_start->memory_space; } + // Find required assignment both for the use and its aliases. If they are both + // non-nullopt, then make sure they require the same assignment. auto required_assignment_at_end = RequiredMemoryAssignmentAt( request.allocation_value->value(), request.end_time); + auto aliased_required_assignment_at_end = + AliasedRequiredAssignmentForUse(*request.use); + if (required_assignment_at_end != aliased_required_assignment_at_end) { + if (required_assignment_at_end == absl::nullopt) { + required_assignment_at_end = aliased_required_assignment_at_end; + } else { + CHECK(aliased_required_assignment_at_end == absl::nullopt || + aliased_required_assignment_at_end->equals_ignoring_time( + *required_assignment_at_end)); + } + } absl::optional required_memory_space_at_end; if (required_assignment_at_end) { required_memory_space_at_end = required_assignment_at_end->memory_space; @@ -1553,7 +1613,7 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( VLOG(3) << "Not trying to prefetch because use requires buffer in default mem."; (*prev_allocation_in_default_mem_it)->Extend(request.end_time); - (*prev_allocation_in_default_mem_it)->AddUse(request.use); + (*prev_allocation_in_default_mem_it)->AddUse(request.use->hlo_use); return true; } @@ -1577,7 +1637,7 @@ bool AlternateMemoryBestFitHeap::AllocateSegment( // If a copy wasn't inserted, then add this use to the latest allocation in // default memory. (*prev_allocation_in_default_mem_it)->Extend(request.end_time); - (*prev_allocation_in_default_mem_it)->AddUse(request.use); + (*prev_allocation_in_default_mem_it)->AddUse(request.use->hlo_use); return true; } @@ -1746,7 +1806,7 @@ bool AlternateMemoryBestFitHeap::AllocateInAlternateMemoryNoCopy( chunk_candidate->chunk, request.start_time, request.end_time)); } request.allocation_value->allocation_sequence()->back()->AddUse( - request.use); + request.use->hlo_use); return true; } return false; @@ -1833,7 +1893,7 @@ bool AlternateMemoryBestFitHeap::Evict(const AllocationRequest& request) { if (!eviction_scheduled) { // If the eviction couldn't be scheduled, then fail. This buffer will be // kept in the default memory. - VLOG(3) << "Bailing: Could not evict " << request.use.ToString() + VLOG(3) << "Bailing: Could not evict " << request.use->hlo_use.ToString() << " because we hit the limit of maximum asynchronous copies " << "between " << hlo_live_range_.flattened_instruction_sequence() @@ -1868,7 +1928,8 @@ bool AlternateMemoryBestFitHeap::Prefetch( earliest_prefetch_time = std::max(earliest_prefetch_time, *request.earliest_prefetch_time); } - options_.prefetch_interval_picker->Begin(request.use, earliest_prefetch_time, + options_.prefetch_interval_picker->Begin(request.use->hlo_use, + earliest_prefetch_time, request.latest_prefetch_time); VLOG(3) << "Trying prefetch picker = " << options_.prefetch_interval_picker->ToDebugString(); @@ -1922,7 +1983,7 @@ bool AlternateMemoryBestFitHeap::Prefetch( request.allocation_value->allocation_sequence()); request.allocation_value->allocation_sequence()->back()->AddUse( - request.use); + request.use->hlo_use); prefetch_failed_due_to_async_copy_ = false; return true; } @@ -1938,11 +1999,11 @@ AlternateMemoryBestFitHeap::FindBestChunkCandidate( if (!preferred_offset) { // Find a chunk that's as long living as possible iterating in reverse over // the use times. - for (auto use_time = request.allocation_value->use_times().rbegin(); - use_time != request.allocation_value->use_times().rend() && - *use_time >= end_time; - ++use_time) { - alternate_mem_interval->end = *use_time; + for (auto use_it = request.allocation_value->uses().rbegin(); + use_it != request.allocation_value->uses().rend() && + use_it->time >= end_time; + ++use_it) { + alternate_mem_interval->end = use_it->time; ChunkCandidate chunk_candidate = FindChunkCandidate(*alternate_mem_interval); if (chunk_candidate.heap_size <= available_heap_size()) { diff --git a/tensorflow/compiler/xla/service/memory_space_assignment.h b/tensorflow/compiler/xla/service/memory_space_assignment.h index c75457dd48e..d87908a6270 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment.h +++ b/tensorflow/compiler/xla/service/memory_space_assignment.h @@ -620,6 +620,18 @@ class MemorySpaceAssignment { // add.5, operand 0 class AllocationValue { public: + // This data structure wraps an HloUse and adds additional metadata that are + // useful for allocation. + struct Use { + // The wrapped HloUse object. + HloUse hlo_use; + // The logical time this use is scheduled. + int64 time; + // All the positions where this use aliases with. The aliased positions + // must get the same allocation. + std::vector aliases; + }; + AllocationValue(const HloValue* value, const HloPosition& position) : value_(value), defining_position_(position) {} @@ -627,8 +639,8 @@ class MemorySpaceAssignment { const HloInstruction* defining_instruction() const { return defining_position().instruction; } - const std::vector& uses() const { return uses_; } - const std::vector& use_times() const { return use_times_; } + const std::vector& uses() const { return uses_; } + std::vector& uses() { return uses_; } const HloValue* value() const { return value_; } const HloComputation* computation() const { return defining_instruction()->parent(); @@ -636,8 +648,7 @@ class MemorySpaceAssignment { AllocationSequence* allocation_sequence() { return &allocation_sequence_; } void AddUse(const HloUse& use, int64 use_time) { - uses_.push_back(use); - use_times_.push_back(use_time); + uses_.push_back({use, use_time, {}}); } std::string ToString() const; @@ -646,8 +657,7 @@ class MemorySpaceAssignment { private: const HloValue* value_; HloPosition defining_position_; - std::vector uses_; - std::vector use_times_; + std::vector uses_; AllocationSequence allocation_sequence_; }; @@ -769,10 +779,18 @@ struct RequiredMemoryAssignment { int64 time; absl::optional chunk; + bool equals_ignoring_time(const RequiredMemoryAssignment& other) const { + return memory_space == other.memory_space && chunk == other.chunk; + } + bool operator==(const RequiredMemoryAssignment& other) const { return memory_space == other.memory_space && time == other.time && chunk == other.chunk; } + + bool operator!=(const RequiredMemoryAssignment& other) const { + return !(*this == other); + } }; // A struct representing an asynchronous copy with its logical start and end @@ -880,7 +898,7 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { bool allow_no_copy_alternate_mem_allocation; absl::optional earliest_prefetch_time; absl::optional preferred_offset; - HloUse use; + const MemorySpaceAssignment::AllocationValue::Use* use; MemorySpaceAssignment::AllocationValue* allocation_value; }; @@ -890,10 +908,6 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { static MemorySpaceAssignment::Allocation* GetLiveAllocationAt( const MemorySpaceAssignment::AllocationSequence& allocations, int64 time); - // Returns the required assignment at a particular time, if available. - absl::optional RequiredMemoryAssignmentAt( - const HloValue* buffer, int64 time) const; - // Returns true if this buffer is allowed to be placed in the alternate // memory. bool IsIntervalAllowedInAlternateMemory(const BufferInterval& interval) const; @@ -914,6 +928,10 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { bool AllocateColocatedIntervals( const std::vector& colocated_intervals); + // Go through all the uses in the AllocationValues and find the aliasing + // positions. + void FindAliases(std::vector* allocation_values) const; + // Finds an allocation for an allocation request for a segment (see the // documentation for AllocationRequest above how a segment is defined). // @@ -950,12 +968,14 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap { const AllocationRequest& request, absl::optional preferred_offset, BufferInterval* alternate_mem_interval) const; - // At the end of an allocation with a sequential call (while, conditional, and - // call), this function adds the necessary aliased assignments within the - // called computations. - void AddAliasedRequiredAssignmentsForSequentialCall( - const HloUse& use, - const MemorySpaceAssignment::Allocation* aliased_allocation); + // Returns the required assignment at a particular time, if available. + absl::optional RequiredMemoryAssignmentAt( + const HloValue* buffer, int64 time) const; + + // Searches for aliases in the use for a required assignment, and returns it + // if found. + absl::optional AliasedRequiredAssignmentForUse( + const AllocationValue::Use& use) const; // Propagates aliased required assignment for a given position. void AddAliasedRequiredAssignment( diff --git a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc index 9c6b42cac91..032a3f53479 100644 --- a/tensorflow/compiler/xla/service/memory_space_assignment_test.cc +++ b/tensorflow/compiler/xla/service/memory_space_assignment_test.cc @@ -1635,7 +1635,8 @@ TEST_P(MemorySpaceAssignmentTest, WhileCondAliasBug) { %constant.5 = s32[1]{0:T(128)} constant({1}) %prev.4 = s32[6]{0:T(128)} parameter(0) %rng.8 = s32[5]{0:T(128)} rng(s32[]{:T(128)} %constant.6, s32[]{:T(128)} %constant.7), distribution=rng_uniform - ROOT %fusion = s32[6]{0:T(128)} fusion(s32[6]{0:T(128)} %prev.4, s32[1]{0:T(128)} %constant.5, s32[5]{0:T(128)} %rng.8), kind=kLoop, calls=%fused_computation + %neg = s32[1]{0:T(128)} negate(s32[1]{0:T(128)} %constant.5) + ROOT %fusion = s32[6]{0:T(128)} fusion(s32[6]{0:T(128)} %prev.4, s32[1]{0:T(128)} %neg, s32[5]{0:T(128)} %rng.8), kind=kLoop, calls=%fused_computation } %WhileWithPrngScalarResult.11 (prev.12: s32[6]) -> pred[] { @@ -1665,6 +1666,62 @@ TEST_P(MemorySpaceAssignmentTest, WhileCondAliasBug) { kDefaultMemorySpace); } +TEST_P(MemorySpaceAssignmentTest, WhileInPlaceBuffer) { + // Ensure that a dynamic update slice within a while loop is able to get an + // alternate memory allocation. + absl::string_view hlo_string = R"( + HloModule Module, is_scheduled=true + + fused_computation { + param0 = f32[2,3] parameter(0) + constant.1 = f32[] constant(0) + broadcast = f32[2,1] broadcast(constant.1), dimensions={} + constant.3 = s32[] constant(0) + ROOT dynamic-update-slice.5 = f32[2,3] dynamic-update-slice(param0, broadcast, constant.3, constant.3) + } + + %WhileBody (body_param: (f32[2,3], f32[2,3], f32[])) -> (f32[2,3], f32[2,3], f32[]) { + %body_param = (f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) parameter(0) + %get-tuple-element.1 = f32[] get-tuple-element((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %body_param), index=2 + %get-tuple-element.2 = f32[2,3]{1,0} get-tuple-element((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %body_param), index=0 + %get-tuple-element.3 = f32[2,3]{1,0} get-tuple-element((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %body_param), index=1 + %fusion = f32[2,3]{1,0} fusion(get-tuple-element.3), kind=kLoop, calls=fused_computation + %multiply = f32[2,3]{1,0} multiply(f32[2,3]{1,0} %get-tuple-element.2, f32[2,3]{1,0} %fusion) + ROOT %tuple = (f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) tuple(f32[2,3]{1,0} %multiply, f32[2,3]{1,0} %fusion, f32[] %get-tuple-element.1) + } + + %WhileCond (cond_param: (f32[2,3], f32[2,3], f32[])) -> pred[] { + %cond_param = (f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) parameter(0) + %get-tuple-element = f32[] get-tuple-element((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %cond_param), index=2 + %constant = f32[] constant(50) + ROOT %compare = pred[] compare(f32[] %get-tuple-element, f32[] %constant), direction=LT + } + + ENTRY %Entry (param_data: f32[2,3], param_iter: f32[], p2: f32[2,3]) -> f32[2,3] { + %param_iter = f32[] parameter(1) + %param_data = f32[2,3]{1,0} parameter(0) + %p2 = f32[2,3]{1,0} parameter(2) + %copy1 = f32[2,3]{1,0} copy(param_data) + %copy2 = f32[2,3]{1,0} copy(p2) + %tuple.1 = (f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) tuple(f32[2,3]{1,0} copy1, f32[2,3]{1,0} copy2, f32[] %param_iter) + %while = (f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) while((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %tuple.1), condition=%WhileCond, body=%WhileBody + %get-tuple-element.4 = f32[2,3]{1,0} get-tuple-element((f32[2,3]{1,0}, f32[2,3]{1,0}, f32[]) %while), index=0 + ROOT %copy3 = f32[2,3]{1,0} copy(get-tuple-element.4) + } + )"; + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + AssignMemorySpace(module.get()); + const HloInstruction* while_op = + module->entry_computation()->GetInstructionWithName("while"); + if (GetParam()) { + EXPECT_EQ( + ShapeUtil::GetSubshape(while_op->shape(), {1}).layout().memory_space(), + kAlternateMemorySpace); + } +} + TEST_P(MemorySpaceAssignmentTest, ControlPredecessorsBug) { // Having control_predecessors on an HLO was preventing us from DCEing an op // that doesn't have any users (tuple.1). The scheduler assumes the graph is From cb8342c4eb6a37c541c69484c9b3b746ce2d9a48 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 8 Jun 2020 10:30:43 -0700 Subject: [PATCH 052/178] [XLA:Python] Implement np.nextafter for bfloat16 extension. Should fix test case failure in https://github.com/google/jax/pull/3309 after a jaxlib release. The implementation is a port of the implementation in xla/client/lib.math.cc. PiperOrigin-RevId: 315303126 Change-Id: I0bdccbb224e74d45663b41581c67de53ee2b77b3 --- tensorflow/compiler/xla/python/bfloat16.cc | 49 +++++++++++++++++-- .../compiler/xla/python/bfloat16_test.py | 21 ++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/python/bfloat16.cc b/tensorflow/compiler/xla/python/bfloat16.cc index e48475b7a85..9e38769168d 100644 --- a/tensorflow/compiler/xla/python/bfloat16.cc +++ b/tensorflow/compiler/xla/python/bfloat16.cc @@ -227,9 +227,9 @@ PyNumberMethods PyBfloat16_AsNumber = { nullptr, // nb_and nullptr, // nb_xor nullptr, // nb_or - PyBfloat16_Int, // nb_int - nullptr, // reserved - PyBfloat16_Float, // nb_float + PyBfloat16_Int, // nb_int + nullptr, // reserved + PyBfloat16_Float, // nb_float nullptr, // nb_inplace_add nullptr, // nb_inplace_subtract @@ -1213,7 +1213,44 @@ struct LogicalXor { } }; -// TODO(phawkins): implement nextafter, spacing +struct NextAfter { + bfloat16 operator()(bfloat16 from, bfloat16 to) { + uint16_t from_as_int, to_as_int; + const uint16_t sign_mask = 1 << 15; + float from_as_float(from), to_as_float(to); + memcpy(&from_as_int, &from, sizeof(bfloat16)); + memcpy(&to_as_int, &to, sizeof(bfloat16)); + if (std::isnan(from_as_float) || std::isnan(to_as_float)) { + return bfloat16(std::numeric_limits::quiet_NaN()); + } + if (from_as_int == to_as_int) { + return to; + } + if (from_as_float == 0) { + if (to_as_float == 0) { + return to; + } else { + // Smallest subnormal signed like `to`. + uint16_t out_int = (to_as_int & sign_mask) | 1; + bfloat16 out; + memcpy(&out, &out_int, sizeof(bfloat16)); + return out; + } + } + uint16_t from_sign = from_as_int & sign_mask; + uint16_t to_sign = to_as_int & sign_mask; + uint16_t from_abs = from_as_int & ~sign_mask; + uint16_t to_abs = to_as_int & ~sign_mask; + uint16_t magnitude_adjustment = + (from_abs > to_abs || from_sign != to_sign) ? 0xFFFF : 0x0001; + uint16_t out_int = from_as_int + magnitude_adjustment; + bfloat16 out; + memcpy(&out, &out_int, sizeof(bfloat16)); + return out; + } +}; + +// TODO(phawkins): implement spacing } // namespace ufuncs @@ -1467,7 +1504,9 @@ bool Initialize() { RegisterUFunc>(numpy.get(), "ceil") && RegisterUFunc>(numpy.get(), - "trunc"); + "trunc") && + RegisterUFunc>( + numpy.get(), "nextafter"); return ok; } diff --git a/tensorflow/compiler/xla/python/bfloat16_test.py b/tensorflow/compiler/xla/python/bfloat16_test.py index 51421a3655e..4c4f8c28d3f 100644 --- a/tensorflow/compiler/xla/python/bfloat16_test.py +++ b/tensorflow/compiler/xla/python/bfloat16_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +import itertools import math from absl.testing import absltest @@ -398,6 +399,26 @@ class Bfloat16NumPyTest(parameterized.TestCase): np.testing.assert_equal(exp1, exp2) numpy_assert_allclose(mant1, mant2, rtol=1e-2) + def testNextAfter(self): + one = np.array(1., dtype=bfloat16) + two = np.array(2., dtype=bfloat16) + zero = np.array(0., dtype=bfloat16) + nan = np.array(np.nan, dtype=bfloat16) + np.testing.assert_equal(np.nextafter(one, two) - one, epsilon) + np.testing.assert_equal(np.nextafter(one, zero) - one, -epsilon / 2) + np.testing.assert_equal(np.isnan(np.nextafter(nan, one)), True) + np.testing.assert_equal(np.isnan(np.nextafter(one, nan)), True) + np.testing.assert_equal(np.nextafter(one, one), one) + smallest_denormal = float.fromhex("1.0p-133") + np.testing.assert_equal(np.nextafter(zero, one), smallest_denormal) + np.testing.assert_equal(np.nextafter(zero, -one), -smallest_denormal) + for a, b in itertools.permutations([0., -0., nan], 2): + np.testing.assert_equal( + np.nextafter( + np.array(a, dtype=np.float32), np.array(b, dtype=np.float32)), + np.nextafter( + np.array(a, dtype=bfloat16), np.array(b, dtype=bfloat16))) + if __name__ == "__main__": absltest.main() From ee4f27a52443e902a07d0b0752ebd9e537349ebd Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 8 Jun 2020 10:40:42 -0700 Subject: [PATCH 053/178] XStatsOwner::GetStat returns optional PiperOrigin-RevId: 315305610 Change-Id: I14ea35d662c585789dc63877d1938ed250ca32fd --- .../convert/xplane_to_op_metrics_db.cc | 9 ++- .../profiler/convert/xplane_to_op_stats.cc | 42 +++++++------ .../profiler/convert/xplane_to_step_events.cc | 13 ++-- .../internal/gpu/device_tracer_test.cc | 12 ++-- .../core/profiler/utils/group_events.cc | 61 ++++++++++--------- tensorflow/core/profiler/utils/group_events.h | 2 +- .../core/profiler/utils/group_events_test.cc | 30 ++++----- .../core/profiler/utils/xplane_visitor.h | 22 ++++--- 8 files changed, 95 insertions(+), 96 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc index 4a369b8b96a..4abe5740969 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_metrics_db.cc @@ -148,11 +148,10 @@ void CollectTfActivities(const XLineVisitor& line, if (tf_op != nullptr) { ++tf_op_id; bool is_eager = false; - event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kIsEager) { - is_eager = stat.IntValue(); - } - }); + if (absl::optional stat = + event.GetStat(StatType::kIsEager)) { + is_eager = stat->IntValue(); + } Timespan span(event.TimestampPs(), event.DurationPs()); tf_activities->push_back( {span.begin_ps(), tf_op_id, kTfOpBegin, *tf_op, is_eager}); diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc index 4d2a45747e0..ccd7c54fa19 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc @@ -49,25 +49,29 @@ namespace { DeviceCapabilities GetDeviceCapFromXPlane(const XPlane& device_plane) { DeviceCapabilities cap; XPlaneVisitor plane = CreateTfXPlaneVisitor(&device_plane); - if (auto clock_rate_khz = plane.GetStats(kDevCapClockRateKHz)) { - cap.set_clock_rate_in_ghz(clock_rate_khz->int64_value() / 1000000.0); - } - if (auto core_count = plane.GetStats(kDevCapCoreCount)) { - cap.set_num_cores(core_count->int64_value()); - } - // Set memory bandwidth in bytes/s. - if (auto memory_bw = plane.GetStats(kDevCapMemoryBandwidth)) { - cap.set_memory_bandwidth(memory_bw->int64_value()); - } - if (auto memory_size_in_bytes = plane.GetStats(kDevCapMemorySize)) { - cap.set_memory_size_in_bytes(memory_size_in_bytes->uint64_value()); - } - if (auto cap_major = plane.GetStats(kDevCapComputeCapMajor)) { - cap.mutable_compute_capability()->set_major(cap_major->int64_value()); - } - if (auto cap_minor = plane.GetStats(kDevCapComputeCapMinor)) { - cap.mutable_compute_capability()->set_minor(cap_minor->int64_value()); - } + plane.ForEachStat([&cap](const XStatVisitor& stat) { + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case kDevCapClockRateKHz: + cap.set_clock_rate_in_ghz(stat.IntValue() / 1000000.0); + break; + case kDevCapCoreCount: + cap.set_num_cores(stat.IntValue()); + break; + case kDevCapMemoryBandwidth: + cap.set_memory_bandwidth(stat.IntValue()); // bytes/s + break; + case kDevCapMemorySize: + cap.set_memory_size_in_bytes(stat.UintValue()); + break; + case kDevCapComputeCapMajor: + cap.mutable_compute_capability()->set_major(stat.IntValue()); + break; + case kDevCapComputeCapMinor: + cap.mutable_compute_capability()->set_minor(stat.IntValue()); + break; + } + }); return cap; } diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc index 7bb7cd6943c..bfe0ac86ef4 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc @@ -112,14 +112,11 @@ StepEvents ConvertHostThreadsXPlaneToStepEvents( StepEvents ConvertDeviceStepInfoToStepMarkers(const XLineVisitor& line) { StepEvents result; line.ForEachEvent([&](const XEventVisitor& event) { - event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kGroupId) { - result[stat.IntValue()].AddMarker( - StepMarker(StepMarkerType::kDeviceStepMarker, event.Name(), - Timespan(event.TimestampPs(), event.DurationPs()))); - return; - } - }); + if (absl::optional stat = event.GetStat(StatType::kGroupId)) { + result[stat->IntValue()].AddMarker( + StepMarker(StepMarkerType::kDeviceStepMarker, event.Name(), + Timespan(event.TimestampPs(), event.DurationPs()))); + } }); return result; } diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc index e6aacb66b89..6fc19e776e1 100644 --- a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc +++ b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc @@ -274,12 +274,12 @@ TEST_F(DeviceTracerTest, TraceToXSpace) { EXPECT_EQ(device_plane->event_metadata_size(), 4); // Check if device capacity is serialized. XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane); - EXPECT_NE(plane.GetStats(kDevCapClockRateKHz), nullptr); - EXPECT_NE(plane.GetStats(kDevCapCoreCount), nullptr); - EXPECT_NE(plane.GetStats(kDevCapMemoryBandwidth), nullptr); - EXPECT_NE(plane.GetStats(kDevCapMemorySize), nullptr); - EXPECT_NE(plane.GetStats(kDevCapComputeCapMajor), nullptr); - EXPECT_NE(plane.GetStats(kDevCapComputeCapMinor), nullptr); + EXPECT_TRUE(plane.GetStat(kDevCapClockRateKHz).has_value()); + EXPECT_TRUE(plane.GetStat(kDevCapCoreCount).has_value()); + EXPECT_TRUE(plane.GetStat(kDevCapMemoryBandwidth).has_value()); + EXPECT_TRUE(plane.GetStat(kDevCapMemorySize).has_value()); + EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMajor).has_value()); + EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMinor).has_value()); // Check if the device events timestamps are set. int total_events = 0; diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 8b4d68a0668..99c6136fc84 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -26,7 +26,6 @@ limitations under the License. #include "absl/container/flat_hash_map.h" #include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "tensorflow/core/lib/gtl/map_util.h" @@ -225,30 +224,30 @@ EventNode::EventNode(const EventNode& event_node) : EventNode(event_node.plane_, event_node.raw_line_, event_node.raw_event_) {} -const XStat* EventNode::GetContextStat(int64 stat_type) const { - if (const XStat* stat = visitor_.GetStats(stat_type)) { - return stat; - } else if (parent_) { - return parent_->GetContextStat(stat_type); +absl::optional EventNode::GetContextStat(int64 stat_type) const { + for (const EventNode* node = this; node != nullptr; node = node->parent_) { + if (absl::optional stat = node->visitor_.GetStat(stat_type)) { + return stat; + } } - return nullptr; + return absl::nullopt; } std::string EventNode::GetGroupName() const { - std::vector name_parts; - if (const XStat* graph_type_stat = GetContextStat(StatType::kGraphType)) { - XStatVisitor stat(plane_, graph_type_stat); - name_parts.push_back(stat.ToString()); + std::string name; + if (absl::optional stat = + GetContextStat(StatType::kGraphType)) { + absl::StrAppend(&name, stat->StrOrRefValue(), " "); } int64 step_num = group_id_.value_or(0); - if (const XStat* step_num_stat = GetContextStat(StatType::kStepNum)) { - step_num = step_num_stat->int64_value(); + if (absl::optional stat = GetContextStat(StatType::kIterNum)) { + step_num = stat->IntValue(); + } else if (absl::optional stat = + GetContextStat(StatType::kStepNum)) { + step_num = stat->IntValue(); } - if (const XStat* iter_num_stat = GetContextStat(StatType::kIterNum)) { - step_num = iter_num_stat->int64_value(); - } - name_parts.push_back(absl::StrCat(step_num)); - return absl::StrJoin(name_parts, " "); + absl::StrAppend(&name, step_num); + return name; } void EventNode::PropagateGroupId(int64 group_id) { @@ -343,11 +342,12 @@ void EventForest::ConnectInterThread( for (const auto& parent_event_node : *parent_event_node_list) { std::vector stats; for (auto stat_type : parent_stat_types) { - const XStat* stat = parent_event_node->GetContextStat(stat_type); + absl::optional stat = + parent_event_node->GetContextStat(stat_type); if (!stat) break; - stats.push_back(stat->value_case() == stat->kInt64Value - ? stat->int64_value() - : stat->uint64_value()); + stats.push_back((stat->ValueCase() == XStat::kInt64Value) + ? stat->IntValue() + : stat->UintValue()); } if (stats.size() == parent_stat_types.size()) { connect_map[stats] = parent_event_node.get(); @@ -359,11 +359,12 @@ void EventForest::ConnectInterThread( for (const auto& child_event_node : *child_event_node_list) { std::vector stats; for (auto stat_type : *child_stat_types) { - const XStat* stat = child_event_node->GetContextStat(stat_type); + absl::optional stat = + child_event_node->GetContextStat(stat_type); if (!stat) break; - stats.push_back(stat->value_case() == stat->kInt64Value - ? stat->int64_value() - : stat->uint64_value()); + stats.push_back((stat->ValueCase() == XStat::kInt64Value) + ? stat->IntValue() + : stat->UintValue()); } if (stats.size() == child_stat_types->size()) { if (auto parent_event_node = gtl::FindPtrOrNull(connect_map, stats)) { @@ -429,14 +430,14 @@ void EventForest::ProcessTensorFlowLoop() { if (!executor_event_list) return; for (auto& executor_event : *executor_event_list) { if (IsTfDataEvent(*executor_event)) continue; - const XStat* step_id_stat = + absl::optional step_id_stat = executor_event->GetContextStat(StatType::kStepId); - const XStat* iter_num_stat = + absl::optional iter_num_stat = executor_event->GetContextStat(StatType::kIterNum); if (!step_id_stat || !iter_num_stat) continue; - int64 step_id = step_id_stat->int64_value(); + int64 step_id = step_id_stat->IntValue(); TensorFlowLoop& tf_loop = tf_loops[step_id]; - TensorFlowLoopIteration& iteration = tf_loop[iter_num_stat->int64_value()]; + TensorFlowLoopIteration& iteration = tf_loop[iter_num_stat->IntValue()]; if (!iteration.first_event || executor_event->StartsBefore(*iteration.first_event)) { iteration.first_event = executor_event.get(); diff --git a/tensorflow/core/profiler/utils/group_events.h b/tensorflow/core/profiler/utils/group_events.h index 2d10480a64f..388da0f5d67 100644 --- a/tensorflow/core/profiler/utils/group_events.h +++ b/tensorflow/core/profiler/utils/group_events.h @@ -78,7 +78,7 @@ class EventNode { const XEventVisitor& GetEventVisitor() const { return visitor_; } - const XStat* GetContextStat(int64 stat_type) const; + absl::optional GetContextStat(int64 stat_type) const; void AddStepName(absl::string_view step_name); diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index ea378b7cb70..6ff069dc1ae 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -174,12 +174,10 @@ TEST(GroupEventsTest, GroupFunctionalOp) { line.ForEachEvent( [&](const tensorflow::profiler::XEventVisitor& event) { absl::optional group_id; - event.ForEachStat( - [&](const tensorflow::profiler::XStatVisitor& stat) { - if (stat.Type() == StatType::kGroupId) { - group_id = stat.IntValue(); - } - }); + if (absl::optional stat = + event.GetStat(StatType::kGroupId)) { + group_id = stat->IntValue(); + } EXPECT_TRUE(group_id.has_value()); EXPECT_EQ(*group_id, 0); }); @@ -305,12 +303,10 @@ TEST(GroupEventsTest, SemanticArgTest) { line.ForEachEvent( [&](const tensorflow::profiler::XEventVisitor& event) { absl::optional group_id; - event.ForEachStat( - [&](const tensorflow::profiler::XStatVisitor& stat) { - if (stat.Type() == StatType::kGroupId) { - group_id = stat.IntValue(); - } - }); + if (absl::optional stat = + event.GetStat(StatType::kGroupId)) { + group_id = stat->IntValue(); + } EXPECT_TRUE(group_id.has_value()); EXPECT_EQ(*group_id, 0); }); @@ -339,12 +335,10 @@ TEST(GroupEventsTest, AsyncEventTest) { line.ForEachEvent( [&](const tensorflow::profiler::XEventVisitor& event) { absl::optional group_id; - event.ForEachStat( - [&](const tensorflow::profiler::XStatVisitor& stat) { - if (stat.Type() == StatType::kGroupId) { - group_id = stat.IntValue(); - } - }); + if (absl::optional stat = + event.GetStat(StatType::kGroupId)) { + group_id = stat->IntValue(); + } if (event.Name() == kAsync) { EXPECT_FALSE(group_id.has_value()); } else { diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h index 4120a2821ca..a838825c773 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.h +++ b/tensorflow/core/profiler/utils/xplane_visitor.h @@ -86,8 +86,10 @@ class XStatsOwner { } } - // Shortcut to get a specfic stat type, nullptr if it is absent. - const XStat* GetStats(int64 stat_type) const; + // Shortcut to get a specific stat type, nullopt if absent. + // This function performs a linear search for the requested stat value. + // Prefer ForEachStat above when multiple stat values are necessary. + absl::optional GetStat(int64 stat_type) const; private: const T* stats_owner_; @@ -241,14 +243,16 @@ class XPlaneVisitor : public XStatsOwner { }; template -const XStat* XStatsOwner::GetStats(int64 stat_type) const { - absl::optional stat_metadata_id = - metadata_->GetStatMetadataId(stat_type); - if (!stat_metadata_id) return nullptr; // type does not exist in the XPlane. - for (const XStat& stat : stats_owner_->stats()) { - if (stat.metadata_id() == *stat_metadata_id) return &stat; +absl::optional XStatsOwner::GetStat(int64 stat_type) const { + if (absl::optional stat_metadata_id = + metadata_->GetStatMetadataId(stat_type)) { + for (const XStat& stat : stats_owner_->stats()) { + if (stat.metadata_id() == *stat_metadata_id) { + return XStatVisitor(metadata_, &stat); + } + } } - return nullptr; // type does not exist in this owner. + return absl::nullopt; // type does not exist in this owner. } } // namespace profiler From b0fe2034d920f794f7928d71e170941983e51c18 Mon Sep 17 00:00:00 2001 From: nammbash Date: Mon, 8 Jun 2020 11:05:18 -0700 Subject: [PATCH 054/178] Review Comments 2 --- tensorflow/core/graph/mkl_graph_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 3044b33273e..0ab9db4e786 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -126,7 +126,7 @@ inline string GetMklEagerOpName(const string& name) { } #ifdef ENABLE_INTEL_MKL_BFLOAT16 -static inline bool HasBfloat16Support(DataType T) { +static inline bool CheckBfloat16Support(DataType T) { static absl::once_flag cpu_bfloat16_warn_once_flag; // Restrict bfloat16 ops to platforms with at least AVX512 support, fall back // to Eigen implementation otherwise. @@ -159,7 +159,7 @@ static inline bool IsMklLayoutDependentOp(const string& op_name, DataType T) { #ifdef ENABLE_INTEL_MKL_BFLOAT16 // Restrict regular ops to FLOAT and BFLOAT16 if (kernel.find(kMklLayoutDependentOpLabelPattern) != string::npos) { - return (T == DT_FLOAT || (T == DT_BFLOAT16 && HasBfloat16Support(T))); + return (T == DT_FLOAT || CheckBfloat16Support(T)); } #else // Restrict regular ops to FLOAT @@ -217,7 +217,7 @@ static inline bool IsMklNameChangeOp(const string& op_name, DataType T) { T == DT_DOUBLE || T == DT_FLOAT); #ifdef ENABLE_INTEL_MKL_BFLOAT16 isTypeAllowed = - isTypeAllowed || (T == DT_BFLOAT16 && HasBfloat16Support(T)); + (isTypeAllowed || CheckBfloat16Support(T)); #endif return isTypeAllowed; } From 4f4f5db82fa862415fe83b616572f49f4a9f4d28 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 8 Jun 2020 11:02:58 -0700 Subject: [PATCH 055/178] Added batch support for OpenCL converters in some cases. PiperOrigin-RevId: 315310765 Change-Id: Icec9b3b989e2c3a796882d7cb53a9c5a27bebedf --- tensorflow/lite/delegates/gpu/cl/api.cc | 2 +- .../delegates/gpu/cl/kernels/converter.cc | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/api.cc b/tensorflow/lite/delegates/gpu/cl/api.cc index e82f67392e8..ffe0fb68881 100644 --- a/tensorflow/lite/delegates/gpu/cl/api.cc +++ b/tensorflow/lite/delegates/gpu/cl/api.cc @@ -513,7 +513,7 @@ class InferenceRunnerImpl : public InferenceRunner { TensorObjectDef TensorToDef(const Tensor& tensor) { TensorObjectDef def; - def.dimensions.b = 1; + def.dimensions.b = tensor.Batch(); def.dimensions.h = tensor.Height(); def.dimensions.w = tensor.Width(); def.dimensions.c = tensor.Channels(); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc index e3170f068e9..4d1b274a0aa 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/converter.cc @@ -44,7 +44,7 @@ class OpenClConverterImpl : public TensorObjectConverter { kernel_.ResetBindingCounter(); RETURN_IF_ERROR(kernel_.SetMemoryAuto(input)); RETURN_IF_ERROR(kernel_.SetMemoryAuto(output)); - int3 grid = int3(dims_.w, dims_.h, dims_.d()); + int3 grid = int3(dims_.w * dims_.b, dims_.h, dims_.d()); int4 size = int4(dims_.w, dims_.h, dims_.d(), dims_.b); RETURN_IF_ERROR(kernel_.SetBytesAuto(size)); RETURN_IF_ERROR(kernel_.SetBytesAuto(dims_.c)); @@ -105,7 +105,7 @@ class FromTensorConverter : public OpenClConverterImpl { "__global " + ToCLDataType(output_def.object_def.data_type) + "* dst", R"( int c = d * 4; - int index = (y * size.x + x) * channels + c; + int index = ((b * size.y + y) * size.x + x) * channels + c; dst[index] = input.x; if (c + 1 < channels) { @@ -143,12 +143,14 @@ const sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_ __kernel void from_tensor()" + src_tensor.GetDeclaration(AccessType::READ) + ", " + params_kernel.first + R"(, int4 size, int channels) { - int x = get_global_id(0); + int linear_id = get_global_id(0); + int x = (linear_id / size.w); + int b = linear_id % size.w; int y = get_global_id(1); int d = get_global_id(2); if (x >= size.x || y >= size.y || d >= size.z) return; )" + ToCLDataType(input_def.object_def.data_type, 4) + - " input = " + src_tensor.ReadWHS("x", "y", "d") + ";\n" + + " input = " + src_tensor.ReadWHSB("x", "y", "d", "b") + ";\n" + params_kernel.second + "\n}"; queue_ = environment->queue(); dims_ = input_def.dimensions; @@ -218,7 +220,7 @@ class ToTensorConverter : public OpenClConverterImpl { return std::make_pair( "__global " + ToCLDataType(input_def.object_def.data_type) + "* src", R"(int c = d * 4; - int index = (y * size.x + x) * channels + c; + int index = ((b * size.y + y) * size.x + x) * channels + c; result.x = src[index]; result.y = c + 1 < channels ? src[index + 1] : 1; result.z = c + 2 < channels ? src[index + 2] : 2; @@ -247,14 +249,16 @@ __kernel void to_tensor()" + params_kernel.first + ", " + dst_tensor.GetDeclaration(AccessType::WRITE) + R"(, int4 size, int channels) { - int x = get_global_id(0); + int linear_id = get_global_id(0); + int x = (linear_id / size.w); + int b = linear_id % size.w; int y = get_global_id(1); int d = get_global_id(2); if (x >= size.x || y >= size.y || d >= size.z) return; )" + ToCLDataType(output_def.object_def.data_type, 4) + " result;\n" + params_kernel.second + "\n " + - dst_tensor.WriteWHS("result", "x", "y", "d") + ";\n}"; + dst_tensor.WriteWHSB("result", "x", "y", "d", "b") + ";\n}"; queue_ = environment->queue(); dims_ = output_def.dimensions; return environment->program_cache()->GetOrCreateCLKernel( @@ -350,8 +354,8 @@ class TrivialCopier : public OpenClConverterImpl { } return GetOpenCLError(clEnqueueCopyBuffer( queue_->queue(), input.memobj, output.memobj, 0, 0, - SizeOf(data_type_) * dims_.w * dims_.h * dims_.d() * 4, 0, nullptr, - nullptr)); + SizeOf(data_type_) * dims_.w * dims_.h * dims_.d() * dims_.b * 4, 0, + nullptr, nullptr)); } absl::Status Copy(const OpenClTexture& input, const OpenClTexture& output) { From 43930312404d885f1d6de086120712ddb633afdb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 7 Jun 2020 17:20:27 +0000 Subject: [PATCH 056/178] Update the boundary check in unravel_index to use std::all_of instead of eigen Signed-off-by: Yong Tang --- tensorflow/core/kernels/unravel_index_op.cc | 22 ++++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/kernels/unravel_index_op.cc b/tensorflow/core/kernels/unravel_index_op.cc index d41915e5c14..fb8d6703d57 100644 --- a/tensorflow/core/kernels/unravel_index_op.cc +++ b/tensorflow/core/kernels/unravel_index_op.cc @@ -55,21 +55,15 @@ class UnravelIndexOp : public OpKernel { auto dims = dims_tensor.vec(); // Chek to make sure indices is not out of boundary - Eigen::Tensor check; - if (TensorShapeUtils::IsScalar(indices_tensor.shape())) { - auto indices = indices_tensor.scalar(); - auto dims_prod = dims.prod(); - check = (indices < dims_prod).all(); - } else { - auto indices = indices_tensor.vec(); - auto dims_prod = dims.prod() - .reshape(Eigen::array({1})) - .broadcast( - Eigen::array({indices_tensor.NumElements()})); - check = (indices < dims_prod).all(); - } + Eigen::Tensor dims_prod_eigen = dims.prod(); + Tidx dims_prod = dims_prod_eigen(); + const Tidx* indices = indices_tensor.flat().data(); + int64 size = indices_tensor.NumElements(); + bool check = std::all_of(indices, indices + size, [&](Tidx index) { + return index < dims_prod; + }); OP_REQUIRES( - ctx, check(), + ctx, check, errors::InvalidArgument("index is out of bound as with dims")); Eigen::array reverse({true}); From d50e348ab34abf44c42c1eee67f295f0c635cc7b Mon Sep 17 00:00:00 2001 From: nammbash Date: Mon, 8 Jun 2020 11:15:04 -0700 Subject: [PATCH 057/178] cleanup / clang formatting --- tensorflow/core/graph/mkl_graph_util.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 0ab9db4e786..cd09ac522d7 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -216,8 +216,7 @@ static inline bool IsMklNameChangeOp(const string& op_name, DataType T) { isTypeAllowed = (T == DT_COMPLEX128 || T == DT_COMPLEX64 || T == DT_DOUBLE || T == DT_FLOAT); #ifdef ENABLE_INTEL_MKL_BFLOAT16 - isTypeAllowed = - (isTypeAllowed || CheckBfloat16Support(T)); + isTypeAllowed = (isTypeAllowed || CheckBfloat16Support(T)); #endif return isTypeAllowed; } From a5e5e94904aa60100b802c15f08b50f7ddb92586 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 8 Jun 2020 11:13:34 -0700 Subject: [PATCH 058/178] Avoid linkstatic on macOS, see https://github.com/bazelbuild/bazel/issues/11552. PiperOrigin-RevId: 315313209 Change-Id: I5d86e271b7733cf16d9b8de98d1c2ee69764608f --- third_party/clog/BUILD.bazel | 19 ++++++++++++++++++- third_party/cpuinfo/BUILD.bazel | 6 +++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/third_party/clog/BUILD.bazel b/third_party/clog/BUILD.bazel index ee601b85f2b..e1d59304299 100644 --- a/third_party/clog/BUILD.bazel +++ b/third_party/clog/BUILD.bazel @@ -19,11 +19,20 @@ cc_library( ":windows": [], "//conditions:default": ["-Wno-unused-result"], }), + defines = select({ + # When linkstatic=False, we need default visibility + ":macos_x86_64": ["CLOG_VISIBILITY="], + "//conditions:default": [], + }), linkopts = select({ ":android": ["-llog"], "//conditions:default": [], }), - linkstatic = True, + linkstatic = select({ + # https://github.com/bazelbuild/bazel/issues/11552 + ":macos_x86_64": False, + "//conditions:default": True, + }), strip_include_prefix = "deps/clog/include", ) @@ -36,3 +45,11 @@ config_setting( name = "windows", values = {"cpu": "x64_windows"}, ) + +config_setting( + name = "macos_x86_64", + values = { + "apple_platform_type": "macos", + "cpu": "darwin", + }, +) diff --git a/third_party/cpuinfo/BUILD.bazel b/third_party/cpuinfo/BUILD.bazel index 2a2be96d82f..c2c6381950f 100644 --- a/third_party/cpuinfo/BUILD.bazel +++ b/third_party/cpuinfo/BUILD.bazel @@ -125,7 +125,11 @@ cc_library( "-Iexternal/cpuinfo/include", "-Iexternal/cpuinfo/src", ], - linkstatic = True, + linkstatic = select({ + # https://github.com/bazelbuild/bazel/issues/11552 + ":macos_x86_64": False, + "//conditions:default": True, + }), # Headers must be in textual_hdrs to allow us to set the standard to C99 textual_hdrs = [ "include/cpuinfo.h", From be20584437f8d1a063a7c918a1cc16cd9a170c49 Mon Sep 17 00:00:00 2001 From: Bruce Fontaine Date: Mon, 8 Jun 2020 11:27:48 -0700 Subject: [PATCH 059/178] Use first worker as default device in tf_function_test. PiperOrigin-RevId: 315316217 Change-Id: I45565148d19756eb8a6038549ea4df5f20c05c70 --- tensorflow/python/distribute/tf_function_test.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/distribute/tf_function_test.py b/tensorflow/python/distribute/tf_function_test.py index 5dc82cfd81b..6621f51cf32 100644 --- a/tensorflow/python/distribute/tf_function_test.py +++ b/tensorflow/python/distribute/tf_function_test.py @@ -50,7 +50,11 @@ class TFFunctionTest(test.TestCase, parameterized.TestCase): self, distribution, run_functions_eagerly): def_function.run_functions_eagerly(run_functions_eagerly) - expected_device = (device_util.canonicalize("cpu:0") + try: + worker = distribution.extended.worker_devices[0] + except RuntimeError: + worker = None + expected_device = (device_util.canonicalize("cpu:0", worker) if run_functions_eagerly else "") with distribution.scope(): with ops.device_v2("cpu:0"): @@ -72,7 +76,11 @@ class TFFunctionTest(test.TestCase, parameterized.TestCase): self, distribution, run_functions_eagerly): def_function.run_functions_eagerly(run_functions_eagerly) - expected_device = (device_util.canonicalize("cpu:0") + try: + worker = distribution.extended.worker_devices[0] + except RuntimeError: + worker = None + expected_device = (device_util.canonicalize("cpu:0", worker) if run_functions_eagerly else "") with distribution.scope(): @def_function.function From 3cfba9571bcc4be237bfdfa3498c66073ae59280 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 8 Jun 2020 11:32:43 -0700 Subject: [PATCH 060/178] Loose the check for BN when momentum == 0. Fix https://github.com/tensorflow/tensorflow/issues/38459. PiperOrigin-RevId: 315317303 Change-Id: I814fdcddec94b13296cfabb2fb80e19e7103c234 --- tensorflow/python/ops/nn_impl.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index eec352b4e2e..cb028bfe1e0 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1615,16 +1615,11 @@ def fused_batch_norm( [Ioffe et al., 2015](http://proceedings.mlr.press/v37/ioffe15.html) ([pdf](http://proceedings.mlr.press/v37/ioffe15.pdf)) """ - if is_training and exponential_avg_factor == 1.0: - if (mean is not None) or (variance is not None): - raise ValueError("Both 'mean' and 'variance' must be None when " - "is_training is True and " - "exponential_avg_factor == 1.0.") - else: - if (mean is None) or (variance is None): - raise ValueError("Both 'mean' and 'variance' must be a 1D tensor when " - "is_training is False or " - "exponential_avg_factor != 1.0.") + if (not is_training or exponential_avg_factor != 1.0) and ( + (mean is None) or (variance is None)): + raise ValueError("Both 'mean' and 'variance' must be a 1D tensor when " + "is_training is False or " + "exponential_avg_factor != 1.0.") x = ops.convert_to_tensor(x, name="input") scale = ops.convert_to_tensor(scale, name="scale") offset = ops.convert_to_tensor(offset, name="offset") From b0b763203e98ea616f44678e194470791db7188d Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Mon, 8 Jun 2020 11:42:27 -0700 Subject: [PATCH 061/178] Add ability for functions to share rendezvous The private `_shared_rendezvous` property allows the function to use the rendezvous of the parent. This is only needed in order to support code where raw send/recv operations are inserted and when functions are run in graph mode where they may not be inlined. PiperOrigin-RevId: 315319264 Change-Id: Ieb6b3924c51ccfd201b4693f3a499f883c7c0b71 --- tensorflow/core/framework/function.h | 2 + .../core/kernels/partitioned_function_ops.cc | 11 +++- .../core/kernels/partitioned_function_ops.h | 1 + tensorflow/python/BUILD | 1 + tensorflow/python/eager/BUILD | 1 + tensorflow/python/eager/def_function.py | 20 ++++++- tensorflow/python/eager/function.py | 1 + tensorflow/python/eager/function_test.py | 56 +++++++++++++++++++ 8 files changed, 89 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index 314d57d8ba4..c588a42d0e8 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -344,6 +344,8 @@ class FunctionLibraryDefinition : public OpRegistryInterface { static constexpr const char* const kDeviceRetOp = "_DeviceRetval"; static constexpr const char* const kIntsOnDeviceAttr = "experimental_ints_on_device"; + static constexpr const char* const kSharedRendezvousAttr = + "shared_rendezvous"; static constexpr const char* const kGradientOp = "SymbolicGradient"; static constexpr const char* const kFuncAttr = "f"; diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index a85f3f449fd..d8112531b73 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -43,7 +43,8 @@ namespace tensorflow { PartitionedCallOp::PartitionedCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx), func_(new NameAttrList), - config_proto_(new ConfigProto) { + config_proto_(new ConfigProto), + shared_rendezvous_(false) { OP_REQUIRES_OK( ctx, ctx->GetAttr(FunctionLibraryDefinition::kFuncAttr, func_.get())); string deprecated_config_serialized; @@ -139,6 +140,11 @@ Status PartitionedCallOp::FillOutputDevices( return errors::NotFound("Failed to find definition for function \"", func_->name(), "\""); } + auto func_attrs = fdef->attr(); + auto attr = func_attrs.find(FunctionLibraryDefinition::kSharedRendezvousAttr); + if (attr != func_attrs.end() && attr->second.b()) { + shared_rendezvous_ = true; + } bool is_type_list; for (const OpDef::ArgDef& ret_def : fdef->signature().output_arg()) { @@ -245,6 +251,9 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle, run_opts.source_device = lib->device() == nullptr ? "" : lib->device()->name(); run_opts.allow_dead_tensors = true; + if (shared_rendezvous_) { + run_opts.rendezvous = ctx->rendezvous(); + } std::vector* rets = new std::vector; const string& func_name = func_->name(); diff --git a/tensorflow/core/kernels/partitioned_function_ops.h b/tensorflow/core/kernels/partitioned_function_ops.h index 27a494a20dd..21402245edd 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.h +++ b/tensorflow/core/kernels/partitioned_function_ops.h @@ -58,6 +58,7 @@ class PartitionedCallOp : public AsyncOpKernel { std::unique_ptr func_; std::unique_ptr config_proto_; string executor_type_; + bool shared_rendezvous_; mutex mu_; // Cache the handle per FLR because this kernel may be instantiated for // a stateful op, different invocations of it may use different FLRs. diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e71900b430f..6efc0252347 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3062,6 +3062,7 @@ tf_gen_op_wrapper_private_py(name = "rnn_ops_gen") tf_gen_op_wrapper_private_py( name = "sendrecv_ops_gen", + visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/core:sendrecv_ops_op_lib", ], diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 014cb97f72f..b4db76da05a 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -462,6 +462,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:random_seed", "//tensorflow/python:resource_variable_ops", + "//tensorflow/python:sendrecv_ops_gen", "//tensorflow/python:sparse_tensor", "//tensorflow/python:tensor_shape", "//tensorflow/python:tensor_spec", diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 57bbca37d41..1b4e73329c6 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -521,6 +521,10 @@ class Function(object): self._function_spec = function_lib.FunctionSpec.from_function_and_signature( python_function, input_signature) self._implements = experimental_implements + # If `True`, the function uses the rendezvous of the parent. This is only + # needed to support code where raw send/recv operations are inserted and + # when functions are run in graph mode where they may not be inlined. + self._shared_rendezvous = None self._autograph = autograph self._experimental_autograph_options = experimental_autograph_options self._experimental_relax_shapes = experimental_relax_shapes @@ -629,6 +633,10 @@ class Function(object): if self._implements is not None: attributes = self._create_implements_attribute() + share = self._shared_rendezvous + if share is not None: + attributes[function_lib.SHARED_RENDEZVOUS_ATTRIBUTE_NAME] = share + if self._experimental_compile is not None: attributes.update(_XlaMustCompile=bool(self._experimental_compile)) if self._experimental_compile: @@ -698,7 +706,8 @@ class Function(object): self._stateless_fn._name = self._name # pylint: disable=protected-access def _clone(self, python_function): - return Function( + """Clone the function with different python function.""" + f = Function( python_function=(self._python_function if python_function is None else python_function), name=self._name, @@ -709,6 +718,11 @@ class Function(object): experimental_relax_shapes=self._experimental_relax_shapes, experimental_compile=self._experimental_compile) + if self._shared_rendezvous: + f._shared_rendezvous = self._shared_rendezvous # pylint: disable=protected-access + + return f + def _decorate(self, decorator): """Allows the captured Python function to be decorated in place. @@ -922,8 +936,8 @@ class Function(object): @function_lib.defun(autograph=False) def initialize_variables(): op_map = object_identity.ObjectIdentityDictionary() - # Stack all the var_is_initialized values into one tensor and interpret the - # numpy value. This will reduce the number of RPCs between client and + # Stack all the var_is_initialized values into one tensor and interpret + # the numpy value. This will reduce the number of RPCs between client and # worker in the remote case. with ops.init_scope(): var_is_initialized = [] diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 37c802b9aa6..2505736b6c9 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -86,6 +86,7 @@ ag_ctx = lazy_loader.LazyLoader( FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name" BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name" IMPLEMENTS_ATTRIBUTE_NAME = "_implements" +SHARED_RENDEZVOUS_ATTRIBUTE_NAME = "shared_rendezvous" def _make_input_signature_hashable(elem, variable_map=None): diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 078ca8b8878..da29c70dbdd 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -60,9 +60,11 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import gen_functional_ops from tensorflow.python.ops import gen_random_ops from tensorflow.python.ops import gen_resource_variable_ops +from tensorflow.python.ops import gen_sendrecv_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import init_ops from tensorflow.python.ops import list_ops @@ -858,6 +860,60 @@ class FunctionTest(test.TestCase, parameterized.TestCase): pool.map(stateful, [object() for _ in range(100)]) self.assertEqual(float(v.read_value()), 0.0) + def testShareRendezvous(self): + + # Disable grappler from inlining the functions. Note we run the send & recv + # in graph mode since with eager mode the function should automatically be + # inlined. + context.context().set_optimizer_experimental_options( + {'disable_meta_optimizer': True}) + + cpu = '/device:CPU:0' + + signature = [tensor_spec.TensorSpec([], dtypes.int32)] + + @def_function.function + def send(): + x = constant_op.constant(1) + gen_sendrecv_ops.send(x, 'x', cpu, 0, cpu) + return x + + send._shared_rendezvous = True # pylint: disable=protected-access + + @def_function.function(input_signature=signature) + def send_body(n): + send() + return n - 1 + + @def_function.function + def recv(): + return gen_sendrecv_ops.recv(dtypes.int32, 'x', cpu, 0, cpu) + + recv._shared_rendezvous = True # pylint: disable=protected-access + + @def_function.function(input_signature=signature) + def recv_body(n): + recv() + return n - 1 + + @def_function.function(input_signature=signature) + def cond(n): + return n > 0 + + # Instead of calling the send & recv functions directly we want to call them + # through a functional while to ensure the rendezvous is shared across the + # while boundary. + @def_function.function + def fn(n): + functional_ops.While([n], cond.get_concrete_function(), + send_body.get_concrete_function()) + return functional_ops.While([n], cond.get_concrete_function(), + recv_body.get_concrete_function()) + + # Use a graph context since functions will not be automatically inlined + with context.graph_mode(), self.cached_session(): + self.evaluate(fn(2)) + def disabled_testRandomSeed(self): @def_function.function From 81ceabffc6a637fad6be081897752c122a493b27 Mon Sep 17 00:00:00 2001 From: Dimitris Vardoulakis Date: Mon, 8 Jun 2020 11:44:27 -0700 Subject: [PATCH 062/178] [XLA:HLO] Small refactoring and more comments in tuple_simplifier. Explain that optimizing partially used tuples within a single computation falls out of the existing optimizations. There is still the option to optimize partially used tuples across computations. Will look into that in a separate CL. PiperOrigin-RevId: 315319714 Change-Id: Ifcc41929cb8213cab661ccefea00138e099d551e --- .../compiler/xla/service/tuple_simplifier.cc | 76 +++++++++---------- .../compiler/xla/service/tuple_simplifier.h | 15 ++++ 2 files changed, 51 insertions(+), 40 deletions(-) diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.cc b/tensorflow/compiler/xla/service/tuple_simplifier.cc index e9c1d93f495..b6978496163 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.cc +++ b/tensorflow/compiler/xla/service/tuple_simplifier.cc @@ -33,6 +33,36 @@ namespace xla { TupleSimplifier::TupleSimplifier(bool exclude_entry_computation) : exclude_entry_computation_(exclude_entry_computation) {} +StatusOr TupleSimplifier::RemoveWholeTuple(HloInstruction* tuple) { + bool changed = false; + HloInstruction* top_tuple = nullptr; + bool can_simplify = true; + for (int64 operand_number = 0; operand_number < tuple->operand_count(); + ++operand_number) { + HloInstruction* operand = tuple->mutable_operand(operand_number); + if (operand->opcode() != HloOpcode::kGetTupleElement || + operand->tuple_index() != operand_number) { + can_simplify = false; + break; + } + if (top_tuple == nullptr) { + top_tuple = operand->mutable_operand(0); + if (!ShapeUtil::Compatible(top_tuple->shape(), tuple->shape())) { + can_simplify = false; + break; + } + } else if (top_tuple != operand->operand(0)) { + can_simplify = false; + break; + } + } + if (can_simplify && top_tuple != nullptr) { + changed = true; + TF_RETURN_IF_ERROR(tuple->parent()->ReplaceInstruction(tuple, top_tuple)); + } + return changed; +} + StatusOr TupleSimplifier::Run(HloModule* module) { // Initially add all GTE and Tuple instructions to the worklist. bool changed = false; @@ -43,46 +73,7 @@ StatusOr TupleSimplifier::Run(HloModule* module) { } for (auto* instruction : computation->MakeInstructionPostOrder()) { if (instruction->opcode() == HloOpcode::kTuple) { - // Collapse the following structure into just 'Tuple-shaped Op': - // - // Tuple-shaped Op - // | - // +-----+-----+ - // | | | - // GTE GTE GTE - // | | | - // +-----+-----+ - // | - // Tuple - // - HloInstruction* top_tuple = nullptr; - bool can_simplify = true; - for (int64 operand_number = 0; - operand_number < instruction->operand_count(); ++operand_number) { - HloInstruction* operand = - instruction->mutable_operand(operand_number); - if (operand->opcode() != HloOpcode::kGetTupleElement || - operand->tuple_index() != operand_number) { - can_simplify = false; - break; - } - if (top_tuple == nullptr) { - top_tuple = operand->mutable_operand(0); - if (!ShapeUtil::Compatible(top_tuple->shape(), - instruction->shape())) { - can_simplify = false; - break; - } - } else if (top_tuple != operand->operand(0)) { - can_simplify = false; - break; - } - } - if (can_simplify && top_tuple != nullptr) { - changed = true; - TF_RETURN_IF_ERROR( - computation->ReplaceInstruction(instruction, top_tuple)); - } + TF_ASSIGN_OR_RETURN(changed, RemoveWholeTuple(instruction)); } else { auto ancestor = instruction->LatestNonGteAncestorAndIndex(); if (ancestor.first == instruction) { @@ -102,6 +93,11 @@ StatusOr TupleSimplifier::Run(HloModule* module) { // GTE // | // GTE + // + // Note that this deletes the Tuple instruction altogether. In addition, + // if only a subset of tuple's elements are used, this transform + // optimizes them one at a time, and after the last use is optimized, + // the Tuple will also be deleted. if (ShapeUtil::Compatible(ancestor.first->shape(), instruction->shape())) { changed = true; diff --git a/tensorflow/compiler/xla/service/tuple_simplifier.h b/tensorflow/compiler/xla/service/tuple_simplifier.h index e126a530234..b912345428b 100644 --- a/tensorflow/compiler/xla/service/tuple_simplifier.h +++ b/tensorflow/compiler/xla/service/tuple_simplifier.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -41,6 +42,20 @@ class TupleSimplifier : public HloModulePass { // apart from the module's entry computation. This is used by Graphcore's // backend. bool exclude_entry_computation_; + + // Collapse the following structure into just 'Tuple-shaped Op': + // + // Tuple-shaped Op + // | + // +-----+-----+ + // | | | + // GTE GTE GTE + // | | | + // +-----+-----+ + // | + // Tuple + // + StatusOr RemoveWholeTuple(HloInstruction* tuple); }; } // namespace xla From e60c1ba960e598be9c0e0cdd331cdc10e8919dbb Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Mon, 8 Jun 2020 11:48:32 -0700 Subject: [PATCH 063/178] [XLA] Improve numerical stability of Logistic. PiperOrigin-RevId: 315320526 Change-Id: Iedfd22d0fb657cb31dda537786ce001f1dab168b --- tensorflow/compiler/tests/unary_ops_test.py | 10 ++++++++++ tensorflow/compiler/xla/client/lib/math.cc | 4 ++-- tensorflow/python/kernel_tests/rnn_cell_test.py | 7 ++----- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 85bf89c4f9e..f2ec6be43cb 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -510,6 +510,16 @@ class UnaryOpsTest(xla_test.XLATestCase): ], dtype=dtype)) + @test_util.disable_mlir_bridge( + "TODO(b/155501444): Handle _UnaryOpsComposition ops from Grappler") + def testFloatOpsDisabledOnMlirBridge(self): + for dtype in self.float_types: + if dtype != np.float16: + self._assertOpOutputMatchesExpected( + lambda x: math_ops.sigmoid(x) / math_ops.log1p(math_ops.exp(x)), + np.array([-40, 40], dtype=dtype), + expected=np.array([1.0, 0.025], dtype=dtype)) + @test_util.disable_mlir_bridge( "TODO(b/153812660): Handle tf.QuantizeAndDequantize compilation") def testQuantizeAndDequantize(self): diff --git a/tensorflow/compiler/xla/client/lib/math.cc b/tensorflow/compiler/xla/client/lib/math.cc index f2ee94a0159..6cbaa043055 100644 --- a/tensorflow/compiler/xla/client/lib/math.cc +++ b/tensorflow/compiler/xla/client/lib/math.cc @@ -1394,8 +1394,8 @@ XlaOp NextAfter(XlaOp from, XlaOp to) { } XlaOp Logistic(XlaOp x) { - auto half = xla::ScalarLike(x, 0.5); - return half + half * xla::Tanh(half * x); + auto one = xla::ScalarLike(x, 1); + return xla::Div(one, (one + xla::Exp(xla::Neg(x)))); } // Computes an approximation to the modified Bessel function of the first kind, diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py index d29c533badf..9de14006de2 100644 --- a/tensorflow/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/python/kernel_tests/rnn_cell_test.py @@ -35,7 +35,6 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import control_flow_v2_toggles from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops @@ -1016,8 +1015,7 @@ class LSTMTest(test.TestCase): }) comparison_fn = self.assertAllEqual - if (test_util.is_xla_enabled() and - control_flow_v2_toggles.control_flow_v2_enabled()): + if test_util.is_xla_enabled(): comparison_fn = self.assertAllClose if in_graph_mode: comparison_fn(outputs_static, outputs_dynamic) @@ -1107,8 +1105,7 @@ class LSTMTest(test.TestCase): }) comparison_fn = self.assertAllEqual - if (test_util.is_xla_enabled() and - control_flow_v2_toggles.control_flow_v2_enabled()): + if test_util.is_xla_enabled(): comparison_fn = self.assertAllClose if in_graph_mode: comparison_fn(outputs_static, outputs_dynamic) From a00daa2f37954ed7d1fae09dfad81b3168b76715 Mon Sep 17 00:00:00 2001 From: Yujing Zhang Date: Mon, 8 Jun 2020 11:48:41 -0700 Subject: [PATCH 064/178] Insert a PackOp between per-replica arg nodes and a dst node which is not assigned a replica device. The dst should be responsible for unpacking the packed tensor. PiperOrigin-RevId: 315320567 Change-Id: Ic7a94e33e8de3c9f98c735d72c0609486afc490e --- tensorflow/core/common_runtime/BUILD | 1 + .../replicate_per_replica_nodes.cc | 37 ++++++++++++++++--- .../replicate_per_replica_nodes_test.cc | 18 ++++++--- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index 016896b36f4..ec9fe0ef688 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -1361,6 +1361,7 @@ cc_library( hdrs = ["replicate_per_replica_nodes.h"], copts = tf_copts(), deps = [ + "//tensorflow/core:framework", "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc index fbae80aef55..610dc1b8835 100644 --- a/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc +++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/replicate_per_replica_nodes.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" namespace tensorflow { namespace { @@ -115,12 +116,36 @@ class ReplicateHelper { // This happens when the dst node runs on a host CPU and // captures a function with an arg node assigned to the same // composite device (e.g. ScanDataset). - // For this case, we only need to add an edge connecting the arg - // node in the outer function and the corresponding arg in the - // inner function, since the host CPU only needs one copy of the - // ResourceHandle. - graph->AddEdge(src_replicated_nodes.at(0), edge->src_output(), dst, - edge->dst_input()); + // For this case, we insert a PackOp between replicated nodes and the + // dst node. The dst node is responsible for unpacking the packed + // tensor. + // Add '/Packed' as a substring to the name of the new node, which + // could be helpful when debugging the graph. + NodeDefBuilder pack_builder( + graph->NewName(absl::StrCat(edge->src()->name(), "/Packed")), + "Pack"); + const int num_replicas = src_replicated_nodes.size(); + pack_builder.Attr("N", num_replicas); + const DataType dtype = edge->src()->output_type(edge->src_output()); + pack_builder.Attr("T", dtype); + std::vector inputs; + inputs.reserve(src_replicated_nodes.size()); + for (Node* replicated_node : src_replicated_nodes) { + inputs.emplace_back(NodeDefBuilder::NodeOut{ + replicated_node->name(), edge->src_output(), dtype}); + } + pack_builder.Input(inputs); + NodeDef pack_def; + TF_RETURN_IF_ERROR(pack_builder.Finalize(&pack_def)); + Status status; + Node* pack_node = graph->AddNode(pack_def, &status); + TF_RETURN_IF_ERROR(status); + pack_node->set_assigned_device_name(dst->assigned_device_name()); + for (int i = 0; i < src_replicated_nodes.size(); ++i) { + graph->AddEdge(src_replicated_nodes[i], edge->src_output(), + pack_node, i); + } + graph->AddEdge(pack_node, /*x=*/0, dst, edge->dst_input()); } else { return errors::InvalidArgument( "Dst node should be assigned to an allowed device. Found an " diff --git a/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc b/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc index db05907710c..0bf2001a955 100644 --- a/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc +++ b/tensorflow/core/common_runtime/replicate_per_replica_nodes_test.cc @@ -258,16 +258,24 @@ TEST(ReplicatePerReplicaNodesTest, NestedFunctions) { ReplicatePerReplicaNodesInFunctionGraph(composite_devices, &graph)); { - // _Arg(TPU:0) -> Func(CPU:0) -> _Retval(CPU:0) - EXPECT_EQ(graph.num_op_nodes(), 4); + // _Arg(TPU:0), _Arg(TPU:1) -> Pack(CPU:0) -> Func(CPU:0) -> _Retval(CPU:0) + EXPECT_EQ(graph.num_op_nodes(), 5); GraphHelper helper(graph); helper.CheckAssignedDevice("arg/R0", "TPU:0"); helper.CheckAssignedDevice("arg/R1", "TPU:1"); + helper.CheckAssignedDevice("arg/Packed", "CPU:0"); helper.CheckAssignedDevice("func", "CPU:0"); helper.CheckAssignedDevice("ret", "CPU:0"); - const EdgeSet& in_edges = helper.GetNodeByName("func")->in_edges(); - EXPECT_EQ(in_edges.size(), 1); - EXPECT_EQ(helper.GetNodeByName("arg/R0"), (*in_edges.begin())->src()); + const EdgeSet& packed_in_edges = + helper.GetNodeByName("arg/Packed")->in_edges(); + EXPECT_EQ(packed_in_edges.size(), 2); + auto it = packed_in_edges.begin(); + EXPECT_EQ(helper.GetNodeByName("arg/R0"), (*it++)->src()); + EXPECT_EQ(helper.GetNodeByName("arg/R1"), (*it)->src()); + const EdgeSet& func_in_edges = helper.GetNodeByName("func")->in_edges(); + EXPECT_EQ(func_in_edges.size(), 1); + EXPECT_EQ(helper.GetNodeByName("arg/Packed"), + (*func_in_edges.begin())->src()); } } From 2a85bf4a14cf02f7b9cc6258c750f5f0e9fb385c Mon Sep 17 00:00:00 2001 From: Jared Duke Date: Mon, 8 Jun 2020 11:51:01 -0700 Subject: [PATCH 065/178] Fix minimal logging build for macos PiperOrigin-RevId: 315321111 Change-Id: I205b82403e663bc415156cbe7e1d82e3b8866e93 --- tensorflow/core/platform/platform.h | 1 + tensorflow/lite/BUILD | 6 ++++++ tensorflow/lite/model_test.cc | 4 ---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/platform.h b/tensorflow/core/platform/platform.h index 46142bc54bf..a840d7b06e3 100644 --- a/tensorflow/core/platform/platform.h +++ b/tensorflow/core/platform/platform.h @@ -34,6 +34,7 @@ limitations under the License. #define PLATFORM_POSIX_IOS #define IS_MOBILE_PLATFORM #else +// If no platform specified, use: #define PLATFORM_POSIX #endif diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 4eb89151f46..06a5fee9739 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -385,6 +385,7 @@ cc_test( features = ["-dynamic_link_test_srcs"], # see go/dynamic_link_test_srcs tags = [ "tflite_not_portable_ios", # TODO(b/117786830) + "tflite_smoke_test", ], deps = [ ":external_cpu_backend_context", @@ -453,6 +454,7 @@ cc_test( ], tags = [ "tflite_not_portable", + "tflite_smoke_test", ], deps = [ ":framework", @@ -501,6 +503,7 @@ cc_test( "no_windows", # No weak symbols with MSVC. "tflite_not_portable_android", "tflite_not_portable_ios", + "tflite_smoke_test", ], deps = [ ":framework", @@ -567,6 +570,9 @@ cc_library( "//tensorflow:ios": [ "minimal_logging_ios.cc", ], + "//tensorflow:macos": [ + "minimal_logging_default.cc", + ], "//conditions:default": [ "minimal_logging_default.cc", ], diff --git a/tensorflow/lite/model_test.cc b/tensorflow/lite/model_test.cc index b6269d6eaf4..ba96494225c 100644 --- a/tensorflow/lite/model_test.cc +++ b/tensorflow/lite/model_test.cc @@ -18,10 +18,6 @@ limitations under the License. #include #include #include -#include -#include -#include - #include #include From 50333e08239419b961f0f126e5432ab0c8a898dc Mon Sep 17 00:00:00 2001 From: Nat Jeffries Date: Mon, 8 Jun 2020 12:18:26 -0700 Subject: [PATCH 066/178] Move xtensa kernels to prepare for https://github.com/tensorflow/tensorflow/pull/39561. PiperOrigin-RevId: 315326758 Change-Id: Iea6cd08553f1f19abbf874c6b4c6233f847e1137 --- tensorflow/lite/micro/kernels/BUILD | 20 +++++++++---------- .../conv.cc | 2 +- .../depthwise_conv.cc | 2 +- .../fixedpoint_utils.h | 0 .../fully_connected.cc | 2 +- .../quantize.cc | 2 +- .../softmax.cc | 0 .../svdf.cc | 2 +- 8 files changed, 15 insertions(+), 15 deletions(-) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/conv.cc (99%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/depthwise_conv.cc (99%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/fixedpoint_utils.h (100%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/fully_connected.cc (99%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/quantize.cc (98%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/softmax.cc (100%) rename tensorflow/lite/micro/kernels/{xtensa_hifimini => xtensa_hifimini_legacy}/svdf.cc (99%) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 229b764a42d..5a429caba4e 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -11,7 +11,7 @@ load( licenses(["notice"]) # Apache 2.0 config_setting( - name = "xtensa_hifimini", + name = "xtensa_hifimini_legacy", define_values = {"tflm_build": "xtensa_hifimini"}, ) @@ -63,14 +63,14 @@ cc_library( "softmax.cc", "svdf.cc", ], - ":xtensa_hifimini": [ - "xtensa_hifimini/conv.cc", - "xtensa_hifimini/depthwise_conv.cc", - "xtensa_hifimini/fixedpoint_utils.h", - "xtensa_hifimini/fully_connected.cc", - "xtensa_hifimini/quantize.cc", - "xtensa_hifimini/softmax.cc", - "xtensa_hifimini/svdf.cc", + ":xtensa_hifimini_legacy": [ + "xtensa_hifimini_legacy/conv.cc", + "xtensa_hifimini_legacy/depthwise_conv.cc", + "xtensa_hifimini_legacy/fixedpoint_utils.h", + "xtensa_hifimini_legacy/fully_connected.cc", + "xtensa_hifimini_legacy/quantize.cc", + "xtensa_hifimini_legacy/softmax.cc", + "xtensa_hifimini_legacy/svdf.cc", ], }), hdrs = ["micro_ops.h"], @@ -101,7 +101,7 @@ cc_library( "//tensorflow/lite/micro:micro_utils", ] + select({ "//conditions:default": [], - ":xtensa_hifimini": [ + ":xtensa_hifimini_legacy": [ #"//third_party/xtensa/cstub64s:hifi_mini", ], }), diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/conv.cc similarity index 99% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/conv.cc index 8895ccf52d7..eee4da97a58 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/conv.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h" namespace tflite { namespace ops { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/depthwise_conv.cc similarity index 99% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/depthwise_conv.cc index cf7552c57b5..21ca078c82a 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/depthwise_conv.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/depthwise_conv.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h" namespace tflite { namespace ops { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h similarity index 100% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fully_connected.cc similarity index 99% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fully_connected.cc index 39f07862753..cc9ff2aa0a7 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/fully_connected.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fully_connected.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h" namespace tflite { namespace ops { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/quantize.cc similarity index 98% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/quantize.cc index 29b2544a625..cfd931ab73e 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/quantize.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/quantize.cc @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h" namespace tflite { namespace ops { diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/softmax.cc similarity index 100% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/softmax.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/softmax.cc diff --git a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/svdf.cc similarity index 99% rename from tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc rename to tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/svdf.cc index 4f784d32b2e..02bb72976dd 100644 --- a/tensorflow/lite/micro/kernels/xtensa_hifimini/svdf.cc +++ b/tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/svdf.cc @@ -24,7 +24,7 @@ limitations under the License. #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/op_macros.h" #include "tensorflow/lite/micro/kernels/activation_utils.h" -#include "tensorflow/lite/micro/kernels/xtensa_hifimini/fixedpoint_utils.h" +#include "tensorflow/lite/micro/kernels/xtensa_hifimini_legacy/fixedpoint_utils.h" namespace tflite { namespace ops { From c665f60b4cd5a4a8c67dcf33297d8c72218104bc Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 8 Jun 2020 21:45:46 +0200 Subject: [PATCH 067/178] xrange() was removed from Python on 1/1/2020 Like #40241 --- tensorflow/python/training/tracking/benchmarks_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/tracking/benchmarks_test.py b/tensorflow/python/training/tracking/benchmarks_test.py index 666adf78c58..c2f2a6872a8 100644 --- a/tensorflow/python/training/tracking/benchmarks_test.py +++ b/tensorflow/python/training/tracking/benchmarks_test.py @@ -77,7 +77,7 @@ class SavingBenchmarks(test.Benchmark): def _run(self, func, num_iters, execution_mode=None): func() start = time.time() - for _ in xrange(num_iters): + for _ in range(num_iters): func() end = time.time() mean_us = (end - start) * 1e6 / num_iters From d91e83ceb7399407cd7b4f769d5d73b5be1560ae Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Mon, 8 Jun 2020 12:19:30 -0700 Subject: [PATCH 068/178] Moved Arguments to base class(GPUOperation). PiperOrigin-RevId: 315326958 Change-Id: Ib2d8c1810e109a267b4beed5f64e4e046f8a3abd --- tensorflow/lite/delegates/gpu/cl/kernels/BUILD | 4 +--- tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc | 2 ++ tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h | 2 ++ tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc | 2 -- tensorflow/lite/delegates/gpu/cl/kernels/softmax.h | 2 -- tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc | 3 --- tensorflow/lite/delegates/gpu/cl/kernels/transpose.h | 2 -- tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc | 2 -- tensorflow/lite/delegates/gpu/cl/kernels/winograd.h | 2 -- 9 files changed, 5 insertions(+), 16 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD index 5c0099fa0a9..8c99fc1c204 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/kernels/BUILD @@ -764,6 +764,7 @@ cc_library( ":tuning_parameters", ":util", ":work_group_picking", + "//tensorflow/lite/delegates/gpu/cl:arguments", "//tensorflow/lite/delegates/gpu/cl:cl_context", "//tensorflow/lite/delegates/gpu/cl:cl_device", "//tensorflow/lite/delegates/gpu/cl:precision", @@ -1123,7 +1124,6 @@ cc_library( ":gpu_operation", ":util", ":work_group_picking", - "//tensorflow/lite/delegates/gpu/cl:arguments", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", "//tensorflow/lite/delegates/gpu/cl:precision", "//tensorflow/lite/delegates/gpu/cl:tensor", @@ -1255,7 +1255,6 @@ cc_library( ":gpu_operation", ":util", ":work_group_picking", - "//tensorflow/lite/delegates/gpu/cl:arguments", "//tensorflow/lite/delegates/gpu/common:operations", "//tensorflow/lite/delegates/gpu/common:types", "@com_google_absl//absl/strings", @@ -1350,7 +1349,6 @@ cc_library( ":gpu_operation", ":util", ":work_group_picking", - "//tensorflow/lite/delegates/gpu/cl:arguments", "//tensorflow/lite/delegates/gpu/cl:cl_device", "//tensorflow/lite/delegates/gpu/cl:cl_kernel", "//tensorflow/lite/delegates/gpu/cl:linear_storage", diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc index 3aa01981844..28a3d1cc42b 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.cc @@ -122,6 +122,7 @@ GPUOperation::GPUOperation(GPUOperation&& operation) : definition_(std::move(operation.definition_)), src_(std::move(operation.src_)), dst_(std::move(operation.dst_)), + args_(std::move(operation.args_)), linked_operations_(std::move(operation.linked_operations_)) {} GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { @@ -129,6 +130,7 @@ GPUOperation& GPUOperation::operator=(GPUOperation&& operation) { definition_ = std::move(operation.definition_); src_ = std::move(operation.src_); dst_ = std::move(operation.dst_); + args_ = std::move(operation.args_); linked_operations_ = std::move(operation.linked_operations_); } return *this; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h index 71b1b8807a5..627ef65ae34 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/cl_context.h" #include "tensorflow/lite/delegates/gpu/cl/cl_device.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/tuning_parameters.h" @@ -114,6 +115,7 @@ class GPUOperation { OperationDef definition_; std::vector src_; std::vector dst_; + Arguments args_; std::vector linked_operations_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc index 44c0c883500..50215f372ef 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.cc @@ -76,13 +76,11 @@ std::string GetSoftmaxKernelCode( Softmax::Softmax(Softmax&& kernel) : GPUOperation(std::move(kernel)), - args_(std::move(kernel.args_)), kernel_(std::move(kernel.kernel_)), work_group_size_(kernel.work_group_size_) {} Softmax& Softmax::operator=(Softmax&& kernel) { if (this != &kernel) { - args_ = std::move(kernel.args_); kernel_ = std::move(kernel.kernel_); std::swap(work_group_size_, kernel.work_group_size_); GPUOperation::operator=(std::move(kernel)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h index f9598542b11..703a40a4e89 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/softmax.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_SOFTMAX_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_SOFTMAX_H_ -#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/precision.h" @@ -47,7 +46,6 @@ class Softmax : public GPUOperation { private: absl::Status BindArguments(); int3 GetGridSize() const; - Arguments args_; CLKernel kernel_; int3 work_group_size_ = int3(8, 4, 1); }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc index 6f1d49a1494..b5ff9d483ab 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "absl/strings/substitute.h" -#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/util.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h" @@ -116,14 +115,12 @@ std::string GetTransposeCode( Transpose::Transpose(Transpose&& operation) : GPUOperation(std::move(operation)), attr_(operation.attr_), - args_(std::move(operation.args_)), kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_) {} Transpose& Transpose::operator=(Transpose&& operation) { if (this != &operation) { attr_ = operation.attr_; - args_ = std::move(operation.args_); kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h index 13f06281012..61038b1e0ca 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/transpose.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TRANSPOSE_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_TRANSPOSE_H_ -#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/common/operations.h" #include "tensorflow/lite/delegates/gpu/common/types.h" @@ -44,7 +43,6 @@ class Transpose : public GPUOperation { int3 GetGridSize() const; TransposeAttributes attr_; - Arguments args_; CLKernel kernel_; int3 work_group_size_; }; diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc index 66687c40c6a..a3323c5db2c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.cc @@ -366,14 +366,12 @@ std::string GetWinograd36To4x4Code( Winograd4x4To36::Winograd4x4To36(Winograd4x4To36&& operation) : GPUOperation(std::move(operation)), padding_(operation.padding_), - args_(std::move(operation.args_)), kernel_(std::move(operation.kernel_)), work_group_size_(operation.work_group_size_) {} Winograd4x4To36& Winograd4x4To36::operator=(Winograd4x4To36&& operation) { if (this != &operation) { std::swap(padding_, operation.padding_); - args_ = std::move(operation.args_); kernel_ = std::move(operation.kernel_); std::swap(work_group_size_, operation.work_group_size_); GPUOperation::operator=(std::move(operation)); diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h index 02e3c268b28..ec8fe22ea11 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h +++ b/tensorflow/lite/delegates/gpu/cl/kernels/winograd.h @@ -16,7 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_WINOGRAD_H_ #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_WINOGRAD_H_ -#include "tensorflow/lite/delegates/gpu/cl/arguments.h" #include "tensorflow/lite/delegates/gpu/cl/cl_kernel.h" #include "tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h" #include "tensorflow/lite/delegates/gpu/cl/linear_storage.h" @@ -62,7 +61,6 @@ class Winograd4x4To36 : public GPUOperation { Padding2D padding_; - Arguments args_; CLKernel kernel_; int3 work_group_size_ = int3(128, 1, 1); }; From 73d8cfcf8baae5a0f1f132d1780e5df6d95d7f3e Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Mon, 8 Jun 2020 12:31:46 -0700 Subject: [PATCH 069/178] Disable tfrt from Tensorflow TAP and add it as a separate target. PiperOrigin-RevId: 315329431 Change-Id: I7e17a6bb78c00a3030db3464863b0efe429a6c33 --- tensorflow/tensorflow.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2730685a26f..95b51b2e9b9 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2283,7 +2283,7 @@ def tf_py_test( kernels = kernels, main = main, shard_count = shard_count, - tags = tags, + tags = tags + ["tfrt"], visibility = [clean_dep("//tensorflow:internal")] + additional_visibility, deps = depset(deps + xla_test_true_list + ["//tensorflow/python:is_tfrt_test_true"]), From 05355c404aff08f8b9043f2837fa6059872fcf7e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 12:44:45 -0700 Subject: [PATCH 070/178] [tf.data] Update the input time computation in the InterleaveMany node. PiperOrigin-RevId: 315331988 Change-Id: I105dd74bb87092f2d56781cdb8e28d2c39360100 --- tensorflow/core/framework/model.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index a88950e4abc..223dbfef3c2 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -59,9 +59,8 @@ class InterleaveMany : public Node { (*input_times)[long_name()] = old_input_time; return; } - double new_input_time = - old_input_time + - SelfProcessingTimeLocked() * static_cast(num_inputs() - 1); + double new_input_time = (old_input_time + SelfProcessingTimeLocked()) * + static_cast(num_inputs() - 1); (*input_times)[long_name()] = new_input_time; } From 10ff91a6dc59c2498668c553ca7f1f2dd264271d Mon Sep 17 00:00:00 2001 From: Meghna Natraj Date: Mon, 8 Jun 2020 13:09:27 -0700 Subject: [PATCH 071/178] Update test case for modify model interface. The input and output float tensors are in the beginning and end of the model respectively. PiperOrigin-RevId: 315337239 Change-Id: I9f0efe0b3abfa82214fc9ac5ce35b4c340b7e24d --- .../optimize/modify_model_interface_test.cc | 390 +++++++++--------- 1 file changed, 196 insertions(+), 194 deletions(-) diff --git a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc index 5a04f28f638..9e4808127bb 100644 --- a/tensorflow/lite/tools/optimize/modify_model_interface_test.cc +++ b/tensorflow/lite/tools/optimize/modify_model_interface_test.cc @@ -26,10 +26,8 @@ namespace tflite { namespace optimize { namespace { -using ::testing::ElementsAreArray; - -// Create a model with 1 quant, 1 FC, 1 dequant -std::unique_ptr CreateModelSingleInputOutput() { +// Create a quantized model with 1 quant, 1 FC, 1 dequant +std::unique_ptr CreateQuantizedModelSingleInputOutput() { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -54,15 +52,15 @@ std::unique_ptr CreateModelSingleInputOutput() { // Op. quant_op->opcode_index = 0; - quant_op->inputs = {2}; - quant_op->outputs = {0}; + quant_op->inputs = {0}; + quant_op->outputs = {1}; fc_op->opcode_index = 1; - fc_op->inputs = {0}; - fc_op->outputs = {1}; + fc_op->inputs = {1}; + fc_op->outputs = {2}; dequant_op->opcode_index = 2; - dequant_op->inputs = {1}; + dequant_op->inputs = {2}; dequant_op->outputs = {3}; model->subgraphs[0]->operators.push_back(std::move(quant_op)); @@ -73,32 +71,31 @@ std::unique_ptr CreateModelSingleInputOutput() { model->operator_codes.push_back(std::move(fc_op_code)); model->operator_codes.push_back(std::move(dequant_op_code)); - // Model input/otuput. - model->subgraphs[0]->inputs = {2}; + // Model input/output. + model->subgraphs[0]->inputs = {0}; model->subgraphs[0]->outputs = {3}; - // Tensors. Float tensors are at the end of the tensor list. - + // Tensors auto tensor_0 = absl::make_unique(); - tensor_0->quantization = absl::make_unique(); - tensor_0->quantization->scale.push_back(0.35); - tensor_0->quantization->zero_point.push_back(28); tensor_0->name = "tensor_0"; tensor_0->shape = {}; - tensor_0->type = TensorType_INT8; + tensor_0->type = TensorType_FLOAT32; auto tensor_1 = absl::make_unique(); tensor_1->quantization = absl::make_unique(); - tensor_1->quantization->scale.push_back(0.12); - tensor_1->quantization->zero_point.push_back(50); + tensor_1->quantization->scale.push_back(0.35); + tensor_1->quantization->zero_point.push_back(28); tensor_1->name = "tensor_1"; tensor_1->shape = {}; tensor_1->type = TensorType_INT8; auto tensor_2 = absl::make_unique(); + tensor_2->quantization = absl::make_unique(); + tensor_2->quantization->scale.push_back(0.12); + tensor_2->quantization->zero_point.push_back(50); tensor_2->name = "tensor_2"; tensor_2->shape = {}; - tensor_2->type = TensorType_FLOAT32; + tensor_2->type = TensorType_INT8; auto tensor_3 = absl::make_unique(); tensor_3->name = "tensor_3"; @@ -116,9 +113,8 @@ std::unique_ptr CreateModelSingleInputOutput() { return model; } -// Create a model with 2 quant, 1 FC, 2 dequant -// The model mimics the behavior of the quantize_model.cc. -std::unique_ptr CreateModelMultipleInputOutput() { +// Create a quantized model with 2 quant, 1 FC, 2 dequant +std::unique_ptr CreateQuantizedModelMultipleInputOutput() { auto model = absl::make_unique(); auto subgraph = absl::make_unique(); auto buffer = absl::make_unique(); @@ -145,21 +141,21 @@ std::unique_ptr CreateModelMultipleInputOutput() { // Op. quant_op_1->opcode_index = 0; - quant_op_1->inputs = {4}; - quant_op_1->outputs = {0}; + quant_op_1->inputs = {0}; + quant_op_1->outputs = {2}; quant_op_2->opcode_index = 0; - quant_op_2->inputs = {5}; - quant_op_2->outputs = {1}; + quant_op_2->inputs = {1}; + quant_op_2->outputs = {3}; fc_op->opcode_index = 1; - fc_op->inputs = {0, 1}; - fc_op->outputs = {2, 3}; + fc_op->inputs = {2, 3}; + fc_op->outputs = {4, 5}; dequant_op_1->opcode_index = 2; - dequant_op_1->inputs = {2}; + dequant_op_1->inputs = {4}; dequant_op_1->outputs = {6}; dequant_op_2->opcode_index = 2; - dequant_op_2->inputs = {3}; + dequant_op_2->inputs = {5}; dequant_op_2->outputs = {7}; model->subgraphs[0]->operators.push_back(std::move(quant_op_1)); @@ -172,30 +168,24 @@ std::unique_ptr CreateModelMultipleInputOutput() { model->operator_codes.push_back(std::move(fc_op_code)); model->operator_codes.push_back(std::move(dequant_op_code)); - // Model input/otuput. - model->subgraphs[0]->inputs = {4, 5}; + // Model input/output. + model->subgraphs[0]->inputs = {0, 1}; model->subgraphs[0]->outputs = {6, 7}; // Tensors auto tensor_0 = absl::make_unique(); - tensor_0->quantization = absl::make_unique(); - tensor_0->quantization->scale.push_back(0.35); - tensor_0->quantization->zero_point.push_back(28); tensor_0->name = "tensor_0"; tensor_0->shape = {}; - tensor_0->type = TensorType_INT8; + tensor_0->type = TensorType_FLOAT32; auto tensor_1 = absl::make_unique(); - tensor_1->quantization = absl::make_unique(); - tensor_1->quantization->scale.push_back(0.12); - tensor_1->quantization->zero_point.push_back(50); tensor_1->name = "tensor_1"; tensor_1->shape = {}; - tensor_1->type = TensorType_INT8; + tensor_1->type = TensorType_FLOAT32; auto tensor_2 = absl::make_unique(); tensor_2->quantization = absl::make_unique(); - tensor_2->quantization->scale.push_back(0.45); + tensor_2->quantization->scale.push_back(0.35); tensor_2->quantization->zero_point.push_back(28); tensor_2->name = "tensor_2"; tensor_2->shape = {}; @@ -203,21 +193,27 @@ std::unique_ptr CreateModelMultipleInputOutput() { auto tensor_3 = absl::make_unique(); tensor_3->quantization = absl::make_unique(); - tensor_3->quantization->scale.push_back(0.22); + tensor_3->quantization->scale.push_back(0.12); tensor_3->quantization->zero_point.push_back(50); tensor_3->name = "tensor_3"; tensor_3->shape = {}; tensor_3->type = TensorType_INT8; auto tensor_4 = absl::make_unique(); + tensor_4->quantization = absl::make_unique(); + tensor_4->quantization->scale.push_back(0.45); + tensor_4->quantization->zero_point.push_back(28); tensor_4->name = "tensor_4"; tensor_4->shape = {}; - tensor_4->type = TensorType_FLOAT32; + tensor_4->type = TensorType_INT8; auto tensor_5 = absl::make_unique(); + tensor_5->quantization = absl::make_unique(); + tensor_5->quantization->scale.push_back(0.22); + tensor_5->quantization->zero_point.push_back(50); tensor_5->name = "tensor_5"; tensor_5->shape = {}; - tensor_5->type = TensorType_FLOAT32; + tensor_5->type = TensorType_INT8; auto tensor_6 = absl::make_unique(); tensor_6->name = "tensor_6"; @@ -266,7 +262,7 @@ std::unique_ptr CreateFloatModel() { model->subgraphs[0]->operators.push_back(std::move(fc_op)); model->operator_codes.push_back(std::move(fc_op_code)); - // Model input/otuput. + // Model input/output. model->subgraphs[0]->inputs = {0}; model->subgraphs[0]->outputs = {1}; @@ -291,12 +287,7 @@ std::unique_ptr CreateFloatModel() { } TEST(ModelInterface, Uint8SingleInputOutput) { - auto model = CreateModelSingleInputOutput(); - - // Ops. - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); - EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 1); - EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 2); + auto model = CreateQuantizedModelSingleInputOutput(); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -305,32 +296,35 @@ TEST(ModelInterface, Uint8SingleInputOutput) { kTfLiteOk); // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); - EXPECT_EQ(model->buffers.size(), 1); - - EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_2"); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], - 0.35); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 156); - - EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_3"); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], - 0.12); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], 178); - - // Ops. + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 3); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 1); EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 0); + + auto input_quant_op = model->subgraphs[0]->operators[0].get(); + auto input = model->subgraphs[0]->tensors[input_quant_op->inputs[0]].get(); + EXPECT_EQ(input->name, "tensor_0"); + EXPECT_EQ(input->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); + EXPECT_EQ(input->quantization->zero_point[0], 156); + + auto output_quant_op = model->subgraphs[0]->operators[2].get(); + auto output = model->subgraphs[0]->tensors[output_quant_op->outputs[0]].get(); + EXPECT_EQ(output->name, "tensor_3"); + EXPECT_EQ(output->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); + EXPECT_EQ(output->quantization->zero_point[0], 178); } TEST(ModelInterface, Int8SingleInputOutput) { - auto model = CreateModelSingleInputOutput(); + auto model = CreateQuantizedModelSingleInputOutput(); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -339,20 +333,35 @@ TEST(ModelInterface, Int8SingleInputOutput) { kTfLiteOk); // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 2); - EXPECT_EQ(model->buffers.size(), 1); - + // TODO (b/158254056): Remove unused inputs and outputs from tensor list + // EXPECT_EQ(model->subgraphs[0]->tensors.size(), 2); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 3); EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 1); EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 2); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); + EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); + + auto fc_op = model->subgraphs[0]->operators[0].get(); + + auto input = model->subgraphs[0]->tensors[fc_op->inputs[0]].get(); + EXPECT_EQ(input->name, "tensor_1"); + EXPECT_EQ(input->type, TensorType_INT8); + EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); + EXPECT_EQ(input->quantization->zero_point[0], 28); + + auto output = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); + EXPECT_EQ(output->name, "tensor_2"); + EXPECT_EQ(output->type, TensorType_INT8); + EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); + EXPECT_EQ(output->quantization->zero_point[0], 50); } TEST(ModelInterface, MixedTypeSingleInputOutput) { - auto model = CreateModelSingleInputOutput(); + auto model = CreateQuantizedModelSingleInputOutput(); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -361,27 +370,34 @@ TEST(ModelInterface, MixedTypeSingleInputOutput) { kTfLiteOk); // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 2); EXPECT_EQ(model->subgraphs[0]->tensors.size(), 3); - EXPECT_EQ(model->buffers.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 2); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 2); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 2); + EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); + EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 1); + + auto quant_op = model->subgraphs[0]->operators[0].get(); + auto input = model->subgraphs[0]->tensors[quant_op->inputs[0]].get(); + EXPECT_EQ(input->name, "tensor_0"); + EXPECT_EQ(input->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.35); + EXPECT_EQ(input->quantization->zero_point[0], 156); + + auto fc_op = model->subgraphs[0]->operators[1].get(); + auto output = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); + EXPECT_EQ(output->name, "tensor_2"); + EXPECT_EQ(output->type, TensorType_INT8); + EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.12); + EXPECT_EQ(output->quantization->zero_point[0], 50); } TEST(ModelInterface, Uint8MutipleInputOutput) { - auto model = CreateModelMultipleInputOutput(); - - // Ops. - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); - EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 0); - EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 1); - EXPECT_EQ(model->subgraphs[0]->operators[3]->opcode_index, 2); - EXPECT_EQ(model->subgraphs[0]->operators[4]->opcode_index, 2); + auto model = CreateQuantizedModelMultipleInputOutput(); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -390,54 +406,55 @@ TEST(ModelInterface, Uint8MutipleInputOutput) { kTfLiteOk); // Verify results. - // Model. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 5); EXPECT_EQ(model->subgraphs[0]->tensors.size(), 8); EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 4); - EXPECT_EQ(model->subgraphs[0]->inputs[1], 5); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->inputs[1], 1); EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); EXPECT_EQ(model->subgraphs[0]->outputs[0], 6); EXPECT_EQ(model->subgraphs[0]->outputs[1], 7); - EXPECT_EQ(model->buffers.size(), 1); - - // Tensors, - EXPECT_EQ(model->subgraphs[0]->tensors[4]->name, "tensor_4"); - EXPECT_EQ(model->subgraphs[0]->tensors[4]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[4]->quantization->scale[0], - 0.35); - EXPECT_EQ(model->subgraphs[0]->tensors[4]->quantization->zero_point[0], 156); - - EXPECT_EQ(model->subgraphs[0]->tensors[5]->name, "tensor_5"); - EXPECT_EQ(model->subgraphs[0]->tensors[5]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[5]->quantization->scale[0], - 0.12); - EXPECT_EQ(model->subgraphs[0]->tensors[5]->quantization->zero_point[0], 178); - - EXPECT_EQ(model->subgraphs[0]->tensors[6]->name, "tensor_6"); - EXPECT_EQ(model->subgraphs[0]->tensors[6]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[6]->quantization->scale[0], - 0.45); - EXPECT_EQ(model->subgraphs[0]->tensors[6]->quantization->zero_point[0], 156); - - EXPECT_EQ(model->subgraphs[0]->tensors[7]->name, "tensor_7"); - EXPECT_EQ(model->subgraphs[0]->tensors[7]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[7]->quantization->scale[0], - 0.22); - EXPECT_EQ(model->subgraphs[0]->tensors[7]->quantization->zero_point[0], 178); - - // Ops. + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 5); EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 0); EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 0); EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 1); EXPECT_EQ(model->subgraphs[0]->operators[3]->opcode_index, 0); EXPECT_EQ(model->subgraphs[0]->operators[4]->opcode_index, 0); + + auto input_quant_1 = model->subgraphs[0]->operators[0].get(); + auto input_1 = model->subgraphs[0]->tensors[input_quant_1->inputs[0]].get(); + EXPECT_EQ(input_1->name, "tensor_0"); + EXPECT_EQ(input_1->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input_1->quantization->scale[0], 0.35); + EXPECT_EQ(input_1->quantization->zero_point[0], 156); + + auto input_quant_2 = model->subgraphs[0]->operators[1].get(); + auto input_2 = model->subgraphs[0]->tensors[input_quant_2->inputs[0]].get(); + EXPECT_EQ(input_2->name, "tensor_1"); + EXPECT_EQ(input_2->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input_2->quantization->scale[0], 0.12); + EXPECT_EQ(input_2->quantization->zero_point[0], 178); + + auto output_quant_1 = model->subgraphs[0]->operators[3].get(); + auto output_1 = + model->subgraphs[0]->tensors[output_quant_1->outputs[0]].get(); + EXPECT_EQ(output_1->name, "tensor_6"); + EXPECT_EQ(output_1->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(output_1->quantization->scale[0], 0.45); + EXPECT_EQ(output_1->quantization->zero_point[0], 156); + + auto output_quant_2 = model->subgraphs[0]->operators[4].get(); + auto output_2 = + model->subgraphs[0]->tensors[output_quant_2->outputs[0]].get(); + EXPECT_EQ(output_2->name, "tensor_7"); + EXPECT_EQ(output_2->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(output_2->quantization->scale[0], 0.22); + EXPECT_EQ(output_2->quantization->zero_point[0], 178); } TEST(ModelInterface, Int8MutipleInputOutput) { - auto model = CreateModelMultipleInputOutput(); + auto model = CreateQuantizedModelMultipleInputOutput(); // Change model type. flatbuffers::FlatBufferBuilder builder; @@ -446,45 +463,45 @@ TEST(ModelInterface, Int8MutipleInputOutput) { kTfLiteOk); // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); - EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); + // TODO (b/158254056): Remove unused inputs and outputs from tensor list + // EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); + EXPECT_EQ(model->subgraphs[0]->tensors.size(), 6); EXPECT_EQ(model->subgraphs[0]->inputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); - EXPECT_EQ(model->subgraphs[0]->inputs[1], 1); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 2); + EXPECT_EQ(model->subgraphs[0]->inputs[1], 3); EXPECT_EQ(model->subgraphs[0]->outputs.size(), 2); - EXPECT_EQ(model->subgraphs[0]->outputs[0], 2); - EXPECT_EQ(model->subgraphs[0]->outputs[1], 3); - EXPECT_EQ(model->buffers.size(), 1); - - // Tensors, - EXPECT_EQ(model->subgraphs[0]->tensors[0]->name, "tensor_0"); - EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[0]->quantization->scale[0], - 0.35); - EXPECT_EQ(model->subgraphs[0]->tensors[0]->quantization->zero_point[0], 28); - - EXPECT_EQ(model->subgraphs[0]->tensors[1]->name, "tensor_1"); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[1]->quantization->scale[0], - 0.12); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->quantization->zero_point[0], 50); - - EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_2"); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], - 0.45); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 28); - - EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_3"); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, TensorType_INT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], - 0.22); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], 50); - - // Ops. + EXPECT_EQ(model->subgraphs[0]->outputs[0], 4); + EXPECT_EQ(model->subgraphs[0]->outputs[1], 5); + EXPECT_EQ(model->operator_codes.size(), 3); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 1); EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); + + auto fc_op = model->subgraphs[0]->operators[0].get(); + + auto input_1 = model->subgraphs[0]->tensors[fc_op->inputs[0]].get(); + EXPECT_EQ(input_1->name, "tensor_2"); + EXPECT_EQ(input_1->type, TensorType_INT8); + EXPECT_FLOAT_EQ(input_1->quantization->scale[0], 0.35); + EXPECT_EQ(input_1->quantization->zero_point[0], 28); + + auto input_2 = model->subgraphs[0]->tensors[fc_op->inputs[1]].get(); + EXPECT_EQ(input_2->name, "tensor_3"); + EXPECT_EQ(input_2->type, TensorType_INT8); + EXPECT_FLOAT_EQ(input_2->quantization->scale[0], 0.12); + EXPECT_EQ(input_2->quantization->zero_point[0], 50); + + auto output_1 = model->subgraphs[0]->tensors[fc_op->outputs[0]].get(); + EXPECT_EQ(output_1->name, "tensor_4"); + EXPECT_EQ(output_1->type, TensorType_INT8); + EXPECT_FLOAT_EQ(output_1->quantization->scale[0], 0.45); + EXPECT_EQ(output_1->quantization->zero_point[0], 28); + + auto output_2 = model->subgraphs[0]->tensors[fc_op->outputs[1]].get(); + EXPECT_EQ(output_2->name, "tensor_5"); + EXPECT_EQ(output_2->type, TensorType_INT8); + EXPECT_FLOAT_EQ(output_2->quantization->scale[0], 0.22); + EXPECT_EQ(output_2->quantization->zero_point[0], 50); } TEST(ModelInterface, Float) { @@ -510,47 +527,32 @@ TEST(ModelInterface, Float) { model.reset(output_model->UnPack()); // Verify results. - EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->subgraphs.size(), 1); - EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); EXPECT_EQ(model->subgraphs[0]->tensors.size(), 4); - EXPECT_EQ(model->buffers.size(), 1); - - // Ops. + EXPECT_EQ(model->subgraphs[0]->inputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->inputs[0], 0); + EXPECT_EQ(model->subgraphs[0]->outputs.size(), 1); + EXPECT_EQ(model->subgraphs[0]->outputs[0], 1); + EXPECT_EQ(model->operator_codes.size(), 3); EXPECT_EQ(model->operator_codes[0]->builtin_code, BuiltinOperator_FULLY_CONNECTED); EXPECT_EQ(model->operator_codes[1]->builtin_code, BuiltinOperator_DEQUANTIZE); EXPECT_EQ(model->operator_codes[2]->builtin_code, BuiltinOperator_QUANTIZE); + EXPECT_EQ(model->subgraphs[0]->operators.size(), 3); - EXPECT_EQ(model->subgraphs[0]->operators[0]->opcode_index, 1); - EXPECT_EQ(model->subgraphs[0]->operators[1]->opcode_index, 0); - EXPECT_EQ(model->subgraphs[0]->operators[2]->opcode_index, 2); + auto dequantize_op = model->subgraphs[0]->operators[0].get(); + auto input = model->subgraphs[0]->tensors[dequantize_op->inputs[0]].get(); + EXPECT_EQ(input->name, "tensor_0_uint8"); + EXPECT_EQ(input->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(input->quantization->scale[0], 0.4); + EXPECT_EQ(input->quantization->zero_point[0], 2); - EXPECT_THAT(model->subgraphs[0]->operators[0]->inputs, ElementsAreArray({2})); - EXPECT_THAT(model->subgraphs[0]->operators[0]->outputs, - ElementsAreArray({0})); - EXPECT_THAT(model->subgraphs[0]->operators[1]->inputs, ElementsAreArray({0})); - EXPECT_THAT(model->subgraphs[0]->operators[1]->outputs, - ElementsAreArray({1})); - EXPECT_THAT(model->subgraphs[0]->operators[2]->inputs, ElementsAreArray({1})); - EXPECT_THAT(model->subgraphs[0]->operators[2]->outputs, - ElementsAreArray({3})); - - // Tensors. - EXPECT_EQ(model->subgraphs[0]->tensors[0]->name, "tensor_0"); - EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT32); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->name, "tensor_1"); - EXPECT_EQ(model->subgraphs[0]->tensors[1]->type, TensorType_FLOAT32); - - EXPECT_EQ(model->subgraphs[0]->tensors[2]->name, "tensor_0_uint8"); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[2]->quantization->scale[0], 0.4); - EXPECT_EQ(model->subgraphs[0]->tensors[2]->quantization->zero_point[0], 2); - - EXPECT_EQ(model->subgraphs[0]->tensors[3]->name, "tensor_1_uint8"); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->type, TensorType_UINT8); - EXPECT_FLOAT_EQ(model->subgraphs[0]->tensors[3]->quantization->scale[0], 0.5); - EXPECT_EQ(model->subgraphs[0]->tensors[3]->quantization->zero_point[0], -5); + auto quantize_op = model->subgraphs[0]->operators[2].get(); + auto output = model->subgraphs[0]->tensors[quantize_op->outputs[0]].get(); + EXPECT_EQ(output->name, "tensor_1_uint8"); + EXPECT_EQ(output->type, TensorType_UINT8); + EXPECT_FLOAT_EQ(output->quantization->scale[0], 0.5); + EXPECT_EQ(output->quantization->zero_point[0], -5); } } // namespace From 854e0bfdb20475e8a2d787cc89f523da6de123e5 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Mon, 8 Jun 2020 13:14:20 -0700 Subject: [PATCH 072/178] Support native python list for Hashing layer. PiperOrigin-RevId: 315338165 Change-Id: I9d7859f63ebf748b6745b7d2c151ba1c8945a3c1 --- .../layers/preprocessing/category_crossing.py | 4 +- .../preprocessing/category_crossing_test.py | 5 +++ .../keras/layers/preprocessing/hashing.py | 38 +++++++++++++++++-- .../layers/preprocessing/hashing_test.py | 21 ++++++++++ 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing.py b/tensorflow/python/keras/layers/preprocessing/category_crossing.py index fa0237595ac..594b9741946 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_crossing.py +++ b/tensorflow/python/keras/layers/preprocessing/category_crossing.py @@ -142,8 +142,8 @@ class CategoryCrossing(Layer): def _preprocess_input(self, inp): if isinstance(inp, (list, tuple, np.ndarray)): inp = ops.convert_to_tensor(inp) - if inp.shape.rank == 1: - inp = array_ops.expand_dims(inp, axis=-1) + if inp.shape.rank == 1: + inp = array_ops.expand_dims(inp, axis=-1) return inp def call(self, inputs): diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py b/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py index 83e78c4dd46..0f320196080 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py +++ b/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py @@ -191,6 +191,11 @@ class CategoryCrossingTest(keras_parameterized.TestCase): output = layer([inputs_0, inputs_1]) self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output) + inputs_0 = np.asarray([1, 2]) + inputs_1 = np.asarray([1, 3]) + output = layer([inputs_0, inputs_1]) + self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output) + def test_crossing_dense_inputs_depth_int(self): layer = category_crossing.CategoryCrossing(depth=1) inputs_0 = constant_op.constant([['a'], ['b'], ['c']]) diff --git a/tensorflow/python/keras/layers/preprocessing/hashing.py b/tensorflow/python/keras/layers/preprocessing/hashing.py index f4a4ae0ccc8..faeeec63a86 100644 --- a/tensorflow/python/keras/layers/preprocessing/hashing.py +++ b/tensorflow/python/keras/layers/preprocessing/hashing.py @@ -19,11 +19,14 @@ from __future__ import division from __future__ import print_function import functools +import numpy as np from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec +from tensorflow.python.framework import tensor_util from tensorflow.python.keras.engine.base_layer import Layer from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import sparse_ops @@ -58,7 +61,7 @@ class Hashing(Layer): Example (FarmHash64): >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3) - >>> inp = np.asarray([['A'], ['B'], ['C'], ['D'], ['E']]) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] >>> layer(inp) + Example (FarmHash64) with list of inputs: + >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3) + >>> inp_1 = [['A'], ['B'], ['C'], ['D'], ['E']] + >>> inp_2 = np.asarray([[5], [4], [3], [2], [1]]) + >>> layer([inp_1, inp_2]) + + + Example (SipHash64): >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3, ... salt=[133, 137]) - >>> inp = np.asarray([['A'], ['B'], ['C'], ['D'], ['E']]) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] >>> layer(inp) >> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3, ... salt=133) - >>> inp = np.asarray([['A'], ['B'], ['C'], ['D'], ['E']]) + >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] >>> layer(inp) Date: Mon, 8 Jun 2020 13:37:21 -0700 Subject: [PATCH 073/178] Adds quantized support to TFL BatchMatmul - Hybrid asymmetric quantization PiperOrigin-RevId: 315342614 Change-Id: Ibd24e3667ab7a340118bdbe4956d5f5b2d882992 --- tensorflow/lite/kernels/batch_matmul.cc | 272 ++++++++++++++++-- tensorflow/lite/kernels/batch_matmul_test.cc | 180 ++++++++++++ tensorflow/lite/kernels/internal/BUILD | 1 + .../kernels/internal/optimized/batch_matmul.h | 161 +++++++++++ .../kernels/internal/reference/batch_matmul.h | 116 ++++++++ 5 files changed, 706 insertions(+), 24 deletions(-) diff --git a/tensorflow/lite/kernels/batch_matmul.cc b/tensorflow/lite/kernels/batch_matmul.cc index a7912654faa..9cbad101bab 100644 --- a/tensorflow/lite/kernels/batch_matmul.cc +++ b/tensorflow/lite/kernels/batch_matmul.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/common.h" +#include "tensorflow/lite/kernels/cpu_backend_context.h" #include "tensorflow/lite/kernels/internal/optimized/batch_matmul.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" @@ -35,6 +36,9 @@ static const int kInputLHSTensor = 0; static const int kInputRHSTensor = 1; static const int kOutputTensor = 0; +static const int kNumTempTensorsForAdjoints = 2; +static const int kNumTempTensorsForHybrid = 5; + // This file has two implementations of Transpose. enum KernelType { kReference, @@ -45,13 +49,14 @@ struct OpData { // The index of the temporary tensors where we store transposed LHS/RHS. int scratch_tensor_index; bool rhs_transposed; + bool compute_row_sums = false; }; struct OpContext { OpContext(TfLiteContext* context, TfLiteNode* node) { params = reinterpret_cast(node->builtin_data); - lhs = GetInput(context, node, 0); - rhs = GetInput(context, node, 1); + lhs = GetInput(context, node, kInputLHSTensor); + rhs = GetInput(context, node, kInputRHSTensor); output = GetOutput(context, node, 0); } TfLiteBatchMatMulParams* params; @@ -61,12 +66,14 @@ struct OpContext { }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { - // Creates two temp tensors to store the transposed LHS and/or RHS if - // needed. auto* op_data = new OpData(); // If the RHS is constant, we only transpose once. op_data->rhs_transposed = false; - context->AddTensors(context, 2, &op_data->scratch_tensor_index); + // Creates the temp tensors to store the transposed LHS and/or RHS, and + // extra buffers for the quantized case. + context->AddTensors(context, + kNumTempTensorsForAdjoints + kNumTempTensorsForHybrid, + &op_data->scratch_tensor_index); return op_data; } @@ -105,15 +112,34 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, OpContext* op_context) { // Create temporary tensors to hold transposed LHS/RHS. OpData* op_data = reinterpret_cast(node->user_data); + const TfLiteTensor* lhs = op_context->lhs; + const TfLiteTensor* rhs = op_context->rhs; TfLiteIntArrayFree(node->temporaries); - node->temporaries = TfLiteIntArrayCreate(2); - node->temporaries->data[0] = op_data->scratch_tensor_index; - node->temporaries->data[1] = op_data->scratch_tensor_index + 1; + // For "hybrid" quantization, we impose the constraint that the LHS + // is float (typically an activation from a prior layer) and the RHS + // is quantized int8. + bool is_hybrid = + (op_context->lhs->type == kTfLiteFloat32 && rhs->type == kTfLiteInt8); + if (is_hybrid) { + node->temporaries = TfLiteIntArrayCreate(kNumTempTensorsForAdjoints + + kNumTempTensorsForHybrid); + } else { + node->temporaries = TfLiteIntArrayCreate(kNumTempTensorsForAdjoints); + } + + const int lhs_rank = NumDimensions(lhs); + const int rhs_rank = NumDimensions(rhs); + const int batch_size = op_context->params->adj_x + ? lhs->dims->data[lhs_rank - 2] + : lhs->dims->data[lhs_rank - 1]; + const int num_units = op_context->params->adj_x + ? lhs->dims->data[lhs_rank - 1] + : lhs->dims->data[lhs_rank - 2]; + // Temp tensor for Transposed LHS; - if (op_context->params->adj_x) { + { + node->temporaries->data[0] = op_data->scratch_tensor_index; TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/0); - const TfLiteTensor* lhs = op_context->lhs; - int lhs_rank = NumDimensions(lhs); TfLiteIntArray* scratch_buffer_size = TfLiteIntArrayCreate(lhs_rank); for (int i = 0; i < lhs_rank - 2; ++i) { scratch_buffer_size->data[i] = lhs->dims->data[i]; @@ -132,8 +158,8 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, // transpose by default, so that the two inputs (LHS and RHS) are in a proper // layout for our fast matrix multiplication routines. If the transpose flag // is set by the caller, the data is already in the desired layout. - const bool rhs_needs_temp = !(op_context->params->adj_y); - if (rhs_needs_temp) { + { + node->temporaries->data[1] = op_data->scratch_tensor_index + 1; TfLiteTensor* scratch_buffer = GetTemporary(context, node, /*index=*/1); const TfLiteTensor* rhs = op_context->rhs; int rhs_rank = NumDimensions(rhs); @@ -155,6 +181,84 @@ TfLiteStatus InitializeTemporaries(TfLiteContext* context, TfLiteNode* node, TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scratch_buffer, scratch_buffer_size)); } + + // If we have to perform on-the-fly quantization (with quantized weights and + // float inputs) first we need to quantize the inputs. Allocate temporary + // buffer to store the intermediate quantized values, the batch scaling + // factors, the accumulator buffer (optimized version), the input offsets, + // and the sums of the rows for each weights matrix. + // RHS = weights, LHS = inputs + if (is_hybrid) { + // Calculate the total number of LHS batches. + int num_batches = 1; + for (int i = 0; i < lhs_rank - 2; ++i) { + num_batches *= lhs->dims->data[i]; + } + int num_weights_matrices = 1; + for (int i = 0; i < rhs_rank - 2; ++i) { + num_weights_matrices *= rhs->dims->data[i]; + } + op_data->compute_row_sums = true; + node->temporaries->data[2] = op_data->scratch_tensor_index + 2; + TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/2); + input_quantized->type = op_context->rhs->type; + input_quantized->allocation_type = kTfLiteArenaRw; + + TfLiteIntArray* input_quantized_size = + TfLiteIntArrayCopy(op_context->lhs->dims); + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_quantized, + input_quantized_size)); + + node->temporaries->data[3] = op_data->scratch_tensor_index + 3; + TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/3); + scaling_factors->type = kTfLiteFloat32; + scaling_factors->allocation_type = kTfLiteArenaRw; + // Total size of scaling factors is batch size * number of total batches + int scaling_dims[1] = {num_batches * batch_size}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = batch_size; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, + scaling_factors_size)); + } + + node->temporaries->data[4] = op_data->scratch_tensor_index + 4; + TfLiteTensor* accum_scratch = GetTemporary(context, node, /*index=*/4); + accum_scratch->type = kTfLiteInt32; + accum_scratch->allocation_type = kTfLiteArenaRw; + int accum_scratch_dims[2] = {num_units, batch_size}; + if (!TfLiteIntArrayEqualsArray(accum_scratch->dims, 2, + accum_scratch_dims)) { + TfLiteIntArray* accum_size = TfLiteIntArrayCreate(2); + accum_size->data[0] = num_units; + accum_size->data[1] = batch_size; + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, accum_scratch, accum_size)); + } + + node->temporaries->data[5] = op_data->scratch_tensor_index + 5; + TfLiteTensor* input_offsets = GetTemporary(context, node, /*index=*/5); + input_offsets->type = kTfLiteInt32; + input_offsets->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1, scaling_dims)) { + TfLiteIntArray* input_offsets_size = TfLiteIntArrayCreate(1); + input_offsets_size->data[0] = num_batches * batch_size; + TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, input_offsets, + input_offsets_size)); + } + node->temporaries->data[6] = op_data->scratch_tensor_index + 6; + TfLiteTensor* row_sums = GetTemporary(context, node, /*index=*/6); + row_sums->type = kTfLiteInt32; + row_sums->allocation_type = kTfLiteArenaRwPersistent; + int row_sums_dims[1] = {num_weights_matrices * num_units}; + if (!TfLiteIntArrayEqualsArray(row_sums->dims, 1, row_sums_dims)) { + TfLiteIntArray* row_sums_size = TfLiteIntArrayCreate(1); + row_sums_size->data[0] = row_sums_dims[0]; + TF_LITE_ENSURE_OK( + context, context->ResizeTensor(context, row_sums, row_sums_size)); + } + } + return kTfLiteOk; } @@ -173,7 +277,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); TF_LITE_ENSURE_EQ(context, lhs_data->type, kTfLiteFloat32); - TF_LITE_ENSURE_EQ(context, rhs_data->type, kTfLiteFloat32); + TF_LITE_ENSURE(context, rhs_data->type == kTfLiteFloat32 || + rhs_data->type == kTfLiteInt8); // Support dimensions between 2 and 4, inclusive. TF_LITE_ENSURE(context, NumDimensions(lhs_data) >= 2); TF_LITE_ENSURE(context, NumDimensions(lhs_data) <= 4); @@ -212,8 +317,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } template -void TransposeRowsColumns(const TfLiteTensor* tensor_in, const scalar* input, - TfLiteTensor* tensor_out, scalar* output) { +void TransposeRowsColumnsImpl(const TfLiteTensor* tensor_in, + const scalar* input, TfLiteTensor* tensor_out, + scalar* output) { RuntimeShape transposed_shape(GetTensorShape(tensor_in)); RuntimeShape shape(GetTensorShape(tensor_in)); TransposeParams params; @@ -230,6 +336,26 @@ void TransposeRowsColumns(const TfLiteTensor* tensor_in, const scalar* input, optimized_ops::Transpose(params, shape, input, transposed_shape, output); } +TfLiteStatus TransposeRowsColumns(TfLiteContext* context, + const TfLiteTensor* tensor_in, + TfLiteTensor* tensor_out) { + if (tensor_in->type == kTfLiteFloat32) { + TransposeRowsColumnsImpl(tensor_in, GetTensorData(tensor_in), + tensor_out, + GetTensorData(tensor_out)); + return kTfLiteOk; + } else if (tensor_in->type == kTfLiteInt8) { + TransposeRowsColumnsImpl( + tensor_in, GetTensorData(tensor_in), tensor_out, + GetTensorData(tensor_out)); + return kTfLiteOk; + } else { + TF_LITE_KERNEL_LOG(context, + "Can only transpose tensors with float and int8 type."); + return kTfLiteError; + } +} + RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) { RuntimeShape swapped_shape(shape); const int32_t dims = shape.DimensionsCount(); @@ -237,6 +363,104 @@ RuntimeShape SwapRowColumnDims(const RuntimeShape& shape) { swapped_shape.SetDim(dims - 1, shape.Dims(dims - 2)); return swapped_shape; } + +template +TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, OpData* data, + const RuntimeShape& input_shape, + const TfLiteTensor* input, + const RuntimeShape& filter_shape, + const TfLiteTensor* filter, + TfLiteTensor* input_quantized, + TfLiteTensor* scaling_factors, + TfLiteTensor* accum_scratch, TfLiteTensor* row_sums, + TfLiteTensor* input_offsets, TfLiteTensor* output) { + const int32_t num_input_dims = input_shape.DimensionsCount(); + + // Input row/cols have been swapped at this point, so dims are + // {input_size, num_batches} + const int input_size = input_shape.Dims(num_input_dims - 2); + const int batch_size = input_shape.Dims(num_input_dims - 1); + + int num_batches_to_quantize = batch_size; + for (int i = 0; i < input_shape.DimensionsCount() - 2; ++i) { + num_batches_to_quantize *= input_shape.Dims(i); + } + // Quantize input from float to uint8 + quantization params (scaling factor). + float* scaling_factors_ptr = GetTensorData(scaling_factors); + int32_t* input_offset_ptr = nullptr; + int32_t* row_sums_ptr = nullptr; + // Only asymmetric quantization is supported. + input_offset_ptr = GetTensorData(input_offsets); + row_sums_ptr = GetTensorData(row_sums); + int8_t* quant_data = GetTensorData(input_quantized); + const int8_t* filter_data = GetTensorData(filter); + const float* input_ptr = GetTensorData(input); + // Quantize each batch independently. + for (int b = 0; b < num_batches_to_quantize; ++b) { + const int offset = b * input_size; + tensor_utils::AsymmetricQuantizeFloats( + input_ptr + offset, input_size, quant_data + offset, + &scaling_factors_ptr[b], &input_offset_ptr[b]); + // Incorporate scaling of the filter. + scaling_factors_ptr[b] *= filter->params.scale; + } + + RuntimeShape output_shape = GetTensorShape(output); + int output_size = 1; + for (int i = 0; i < output_shape.DimensionsCount(); ++i) { + output_size *= output_shape.Dims(i); + } + std::fill_n(GetTensorData(output), output_size, 0.0f); + if (kernel_type == kGenericOptimized) { + optimized_ops::BatchMatMul( + filter_shape, filter_data, input_shape, quant_data, scaling_factors_ptr, + input_offset_ptr, row_sums_ptr, GetTensorShape(output), + GetTensorData(accum_scratch), GetTensorData(output), + &(data->compute_row_sums), CpuBackendContext::GetFromContext(context)); + } else { + reference_ops::BatchMatMul( + filter_shape, filter_data, input_shape, quant_data, scaling_factors_ptr, + input_offset_ptr, row_sums_ptr, GetTensorShape(output), + GetTensorData(output), &(data->compute_row_sums)); + } + + return kTfLiteOk; +} + +template +TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node, + OpData* data, const RuntimeShape& lhs_shape, + const TfLiteTensor* lhs, + const RuntimeShape& rhs_shape, + const TfLiteTensor* rhs, TfLiteTensor* output) { + if (lhs->type == kTfLiteFloat32) { + TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/2); + TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/3); + TfLiteTensor* accum_scratch = GetTemporary(context, node, /*index=*/4); + TfLiteTensor* input_offsets = GetTemporary(context, node, /*index=*/5); + TfLiteTensor* row_sums = GetTemporary(context, node, /*index=*/6); + return EvalHybrid( + context, node, data, lhs_shape, lhs, rhs_shape, rhs, input_quantized, + scaling_factors, accum_scratch, row_sums, input_offsets, output); + } else { + TF_LITE_KERNEL_LOG(context, + "Currently only hybrid quantization is supported.\n"); + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteTensor* GetRhs(TfLiteContext* context, TfLiteNode* node, + const TfLiteTensor* rhs) { + TfLiteTensor* transposed_rhs = GetTemporary(context, node, 1); + if (rhs->type == kTfLiteInt8) { + // Get the quantization params from the weights tensors. + transposed_rhs->params.scale = rhs->params.scale; + transposed_rhs->params.zero_point = rhs->params.zero_point; + } + return transposed_rhs; +} + // Perform a batch matrix multiply on // LHS <..., A, B> X RHS<..., B, C> // where the leading dimensions of LHS and RHS obey broadcasting rules @@ -261,29 +485,25 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { bool adj_y = op_context.params->adj_y; bool adj_x = op_context.params->adj_x; - const TfLiteTensor* rhs_tensor = adj_y ? rhs : GetTemporary(context, node, 1); + const TfLiteTensor* rhs_tensor = adj_y ? rhs : GetRhs(context, node, rhs); const TfLiteTensor* lhs_tensor = adj_x ? GetTemporary(context, node, 0) : lhs; if (!adj_y) { // TODO(b/154760341) Constant tensors should already be transposed, but // we transpose once if necessary for now. if (!(IsConstantTensor(rhs) && op_data->rhs_transposed)) { - TransposeRowsColumns( - rhs, GetTensorData(rhs), GetTemporary(context, node, 1), - GetTensorData(GetTemporary(context, node, 1))); + TransposeRowsColumns(context, rhs, GetTemporary(context, node, 1)); op_data->rhs_transposed = true; } } if (adj_x) { - TransposeRowsColumns( - lhs, GetTensorData(lhs), GetTemporary(context, node, 0), - GetTensorData(GetTemporary(context, node, 0))); + TransposeRowsColumns(context, lhs, GetTemporary(context, node, 0)); } RuntimeShape rhs_shape = adj_y ? orig_rhs_shape : SwapRowColumnDims(orig_rhs_shape); RuntimeShape lhs_shape = adj_x ? orig_lhs_shape : SwapRowColumnDims(orig_lhs_shape); - switch (lhs->type) { + switch (rhs->type) { case kTfLiteFloat32: // Note we pass RHS args first, LHS args second. See note above. if (kernel_type == kGenericOptimized) { @@ -299,6 +519,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetTensorData(output)); } break; + case kTfLiteInt8: + EvalQuantized(context, node, op_data, lhs_shape, lhs_tensor, + rhs_shape, rhs_tensor, output); + break; default: TF_LITE_KERNEL_LOG(context, "Currently BatchMatMul doesn't support type: %s", diff --git a/tensorflow/lite/kernels/batch_matmul_test.cc b/tensorflow/lite/kernels/batch_matmul_test.cc index eeb075c4fe1..aec031015c0 100644 --- a/tensorflow/lite/kernels/batch_matmul_test.cc +++ b/tensorflow/lite/kernels/batch_matmul_test.cc @@ -247,5 +247,185 @@ TEST(BatchMatMulOpModelTest, Float32Test_BroadcastFromRHS) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({3, 1, 4, 2})); } +// In the hybrid model the weights are quantized int8. But the input +// and output are expected to be in float precision. +class HybridAsymmetricBatchMatMulOpModel : public SingleOpModel { + public: + HybridAsymmetricBatchMatMulOpModel( + int units, int batches, const TensorData& lhs, const TensorData& rhs, + const TensorData& output = {TensorType_FLOAT32}, bool adj_x = false, + bool adj_y = false) + : units_(units), batches_(batches) { + int total_input_size = 1; + for (size_t i = 0; i < lhs.shape.size(); ++i) { + total_input_size *= lhs.shape[i]; + } + input_size_ = total_input_size / batches_; + + lhs_id_ = AddInput(lhs); + rhs_id_ = AddInput(rhs); + + output_id_ = AddOutput(output); + + SetBuiltinOp(BuiltinOperator_BATCH_MATMUL, + BuiltinOptions_BatchMatMulOptions, + CreateBatchMatMulOptions(builder_, adj_x, adj_y).Union()); + BuildInterpreter({GetShape(lhs_id_), GetShape(rhs_id_)}); + } + void SetWeights(const std::vector& data) { + SymmetricQuantizeAndPopulate(rhs_id_, data); + } + + void SetSignedWeights(std::initializer_list f) { + SignedSymmetricQuantizeAndPopulate(rhs_id_, f); + } + + void SetInput(const std::vector& f) { PopulateTensor(lhs_id_, f); } + std::vector GetOutput() { return ExtractVector(output_id_); } + std::vector GetOutputShape() { return GetTensorShape(output_id_); } + + int input_size() { return input_size_; } + int num_units() { return units_; } + int num_batches() { return batches_; } + + int lhs() const { return lhs_id_; } + int rhs() const { return rhs_id_; } + + protected: + int lhs_id_; + int rhs_id_; + int output_id_; + int units_; + int batches_; + int input_size_; +}; + +TEST(HybridAsymmetricBatchMatMulOpTest, SimpleTestQuantizedInt8) { + HybridAsymmetricBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}); + + m.SetSignedWeights({ + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 196, + 196, + 196, + 246, + 246, + 246, + }, + /*max_abs_error=*/0.64f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 3})); +} + +TEST(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastWeights) { + HybridAsymmetricBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 3}, 0, 0, 10.0 / 127.0, 0}); + + m.SetSignedWeights({ + 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 24, 24, 24, // + 58, 58, 58, // + 196, 196, 196, // + 246, 246, 246, // + }, + /*max_abs_error=*/1.3f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 3})); +} + +TEST(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastBigWeights) { + HybridAsymmetricBatchMatMulOpModel m( + /*units=*/9, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 2, 10}}, + /*rhs=*/{TensorType_INT8, {10, 9}, 0, 0, 10.0 / 127.0, 0}); + + m.SetSignedWeights({ + 1, 1, 1, 17, 17, 17, 26, 26, 26, 2, 2, 2, 18, 18, 18, 27, 27, 27, + 3, 3, 3, 19, 19, 19, 28, 28, 28, 4, 4, 4, 20, 20, 20, 29, 29, 29, + 5, 5, 5, 21, 21, 21, 30, 30, 30, 6, 6, 6, 22, 22, 22, 31, 31, 31, + 7, 7, 7, 23, 23, 23, 32, 32, 32, 8, 8, 8, 24, 24, 24, 33, 33, 33, + 9, 9, 9, 25, 25, 25, 34, 34, 34, 10, 10, 10, 26, 26, 26, 35, 35, 35, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + 11, 12, 13, 14, 15, 16, 17, 18, -19, -20, // batch 1, 0 + 11, 12, 13, 14, 15, 16, 17, -18, 19, -20, // batch 1, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + { + 23, 23, 23, 295, 295, 295, 449, 449, 449, // + 60, 60, 60, 364, 364, 364, 533, 533, 533, // + 195, 195, 195, 1429, 1429, 1429, 2124, 2124, 2124, // + 250, 250, 250, 1512, 1512, 1512, 2213, 2213, 2213 // + }, + /*max_abs_error=*/1.3f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 9})); +} + +TEST(HybridAsymmetricBatchMatMulOpTest, QuantizedInt8BroadcastInputs) { + HybridAsymmetricBatchMatMulOpModel m( + /*units=*/3, /*batches=*/2, + /*lhs=*/{TensorType_FLOAT32, {2, 10}}, + /*rhs=*/{TensorType_INT8, {2, 10, 3}, 0, 0, 10.0 / 127.0, 0}); + + m.SetSignedWeights({ + 1, -3, 1, 2, -2, 2, 3, -1, 3, 4, 0, 4, 5, 1, 5, 6, 2, 6, 7, 3, + 7, 8, 4, 8, 9, 5, 9, 10, 6, 10, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, + 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, + }); + + m.SetInput({ + 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // batch 0, 0 + 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // batch 0, 1 + }); + + m.Invoke(); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + { + 24, -45, 24, // + 58, -18, 58, // + 24, 24, 24, // + 58, 58, 58, // + }, + /*max_abs_error=*/0.64f))); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 3})); +} + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 4bbf6704622..a02a5bf3981 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -505,6 +505,7 @@ cc_library( ":common", ":compatibility", ":cppmath", + ":portable_tensor_utils", ":quantization_util", ":strided_slice_logic", ":tensor", diff --git a/tensorflow/lite/kernels/internal/optimized/batch_matmul.h b/tensorflow/lite/kernels/internal/optimized/batch_matmul.h index cd1241ed225..24b5012304f 100644 --- a/tensorflow/lite/kernels/internal/optimized/batch_matmul.h +++ b/tensorflow/lite/kernels/internal/optimized/batch_matmul.h @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/lite/kernels/cpu_backend_gemm.h" #include "tensorflow/lite/kernels/cpu_backend_gemm_params.h" #include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/tensor_utils.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { @@ -111,6 +112,166 @@ inline void BatchMatMul(const RuntimeShape& lhs_shape, const float* lhs_data, } } +inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data, + const RuntimeShape& rhs_shape, const int8_t* rhs_data, + const float* scaling_factors, + const int32_t* input_offset, int32_t* row_sums, + const RuntimeShape& output_shape, + int32_t* accum_scratch, float* output_data, + bool* compute_row_sums, CpuBackendContext* context) { + using ::tflite::cpu_backend_gemm::Gemm; + using ::tflite::cpu_backend_gemm::GemmParams; + using ::tflite::cpu_backend_gemm::MatrixParams; + + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(5, lhs_shape); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(5, rhs_shape); + + // Determine which dimension is the broadcast dimension. + auto broadcast_dim = [](int lhs_dim, int rhs_dim) { + if (lhs_dim == rhs_dim) return lhs_dim; + if (lhs_dim == 1) return rhs_dim; + TFLITE_DCHECK_EQ(rhs_dim, 1); + return lhs_dim; + }; + + // Compute the "extent" for iterating on this dimension. + // If we are broadcasting, then don't advance (i.e return 0). + auto extent = [](const RuntimeShape& shape, int x) { + if (shape.Dims(x) == 1) { + return 0; + } + int prod = 1; + for (int i = x + 1; i < shape.DimensionsCount(); ++i) { + prod *= shape.Dims(i); + } + return prod; + }; + + const int batch_dim0 = + broadcast_dim(extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); + const int batch_dim1 = + broadcast_dim(extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); + const int batch_dim2 = + broadcast_dim(extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); + + const int lhs_ext0 = extent(extended_lhs_shape, 0); + const int lhs_ext1 = extent(extended_lhs_shape, 1); + const int lhs_ext2 = extent(extended_lhs_shape, 2); + const int rhs_ext0 = extent(extended_rhs_shape, 0); + const int rhs_ext1 = extent(extended_rhs_shape, 1); + const int rhs_ext2 = extent(extended_rhs_shape, 2); + + // Set params for each matrix multiply. + const int lhs_rows = extended_lhs_shape.Dims(3); + const int rhs_cols = extended_rhs_shape.Dims(4); + const int accum_depth = extended_lhs_shape.Dims(4); + + const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols; + const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols; + const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols; + const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows; + const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows; + const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows; + + if (!compute_row_sums || *compute_row_sums) { + int num_weights_matrices = 1; + for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) { + num_weights_matrices *= extended_lhs_shape.Dims(i); + } + memset(row_sums, 0, sizeof(int32_t) * lhs_rows * num_weights_matrices); + for (int j = 0; j < num_weights_matrices; ++j) { + tensor_utils::ReductionSumVector(lhs_data + j * lhs_rows * accum_depth, + row_sums + j * lhs_rows, lhs_rows, + accum_depth); + } + if (compute_row_sums) { + *compute_row_sums = false; + } + } + + MatrixParams lhs_params; + lhs_params.order = cpu_backend_gemm::Order::kRowMajor; + lhs_params.rows = lhs_rows; + lhs_params.cols = accum_depth; + + MatrixParams rhs_params; + rhs_params.order = cpu_backend_gemm::Order::kColMajor; + rhs_params.rows = accum_depth; + rhs_params.cols = rhs_cols; + + MatrixParams dst_params; + dst_params.order = cpu_backend_gemm::Order::kColMajor; + dst_params.rows = lhs_rows; + dst_params.cols = rhs_cols; + + for (int b0 = 0; b0 < batch_dim0; ++b0) { + const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0); + const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0); + const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0); + for (int b1 = 0; b1 < batch_dim1; ++b1) { + const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1); + const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1); + const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1); + for (int b2 = 0; b2 < batch_dim2; ++b2) { + const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2); + const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2); + const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2); + float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; + GemmParams gemm_params; + cpu_backend_gemm::Gemm(lhs_params, lhs_ptr2, rhs_params, rhs_ptr2, + dst_params, accum_scratch, gemm_params, context); + for (int j = 0; j < rhs_cols; ++j) { + const float batch_scaling_factor = scale_ptr2[j]; + const float batch_offset = static_cast(ioff_ptr2[j]); + int i = 0; +#ifdef USE_NEON + const float32x4_t scaling_factor0 = vdupq_n_f32(batch_scaling_factor); + const float32x4_t scaling_factor1 = vdupq_n_f32(batch_scaling_factor); + const int32x4_t input_offset0 = vdupq_n_s32(-batch_offset); + const int32x4_t input_offset1 = vdupq_n_s32(-batch_offset); + for (; i < lhs_rows - 8; i += 8) { + // Load the row sums; + const int32x4_t row_sum0 = vld1q_s32(woff_ptr2 + i); + const int32x4_t row_sum1 = vld1q_s32(woff_ptr2 + i + 4); + // Load the accumulated values. + int idx = lhs_rows * j + i; + const int32x4_t scratch_val0 = vld1q_s32(accum_scratch + idx); + const int32x4_t scratch_val1 = vld1q_s32(accum_scratch + idx + 4); + const int32x4_t dotprod0 = + vmlaq_s32(scratch_val0, row_sum0, input_offset0); + const int32x4_t dotprod1 = + vmlaq_s32(scratch_val1, row_sum1, input_offset1); + const float32x4_t float_val0 = vcvtq_f32_s32(dotprod0); + const float32x4_t float_val1 = vcvtq_f32_s32(dotprod1); + const float32x4_t result0 = vmlaq_f32(vld1q_f32(out_ptr + idx), + float_val0, scaling_factor0); + const float32x4_t result1 = vmlaq_f32(vld1q_f32(out_ptr + idx + 4), + float_val1, scaling_factor1); + vst1q_f32(out_ptr + idx, result0); + vst1q_f32(out_ptr + idx + 4, result1); + } +#endif // USE_NEON + for (; i < lhs_rows; ++i) { + int idx = lhs_rows * j + i; + accum_scratch[idx] -= woff_ptr2[i] * batch_offset; + out_ptr[idx] += batch_scaling_factor * accum_scratch[idx]; + } + } + } + } + } +} + } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/reference/batch_matmul.h b/tensorflow/lite/kernels/internal/reference/batch_matmul.h index 2a6b6d6f0f5..c8d6d6a0e29 100644 --- a/tensorflow/lite/kernels/internal/reference/batch_matmul.h +++ b/tensorflow/lite/kernels/internal/reference/batch_matmul.h @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h" #include "tensorflow/lite/kernels/internal/types.h" namespace tflite { @@ -98,6 +99,121 @@ inline void BatchMatMul(const RuntimeShape& lhs_shape, const float* lhs_data, } } +inline void BatchMatMul(const RuntimeShape& lhs_shape, const int8_t* lhs_data, + const RuntimeShape& rhs_shape, const int8_t* rhs_data, + const float* scaling_factors, + const int32_t* input_offset, int32_t* row_sums, + const RuntimeShape& output_shape, float* output_data, + bool* compute_row_sums) { + const RuntimeShape extended_lhs_shape = + RuntimeShape::ExtendedShape(5, lhs_shape); + const RuntimeShape extended_rhs_shape = + RuntimeShape::ExtendedShape(5, rhs_shape); + + // Determine which dimension is the broadcast dimension. + auto broadcast_dim = [](int lhs_dim, int rhs_dim) { + if (lhs_dim == rhs_dim) return lhs_dim; + if (lhs_dim == 1) return rhs_dim; + TFLITE_DCHECK_EQ(rhs_dim, 1); + return lhs_dim; + }; + + // Compute the "extent" for iterating on this dimension. + // If we are broadcasting, then don't advance (i.e return 0). + auto extent = [](const RuntimeShape& shape, int x) { + if (shape.Dims(x) == 1) { + return 0; + } + int prod = 1; + for (int i = x + 1; i < shape.DimensionsCount(); ++i) { + prod *= shape.Dims(i); + } + return prod; + }; + + const int batch_dim0 = + broadcast_dim(extended_lhs_shape.Dims(0), extended_rhs_shape.Dims(0)); + const int batch_dim1 = + broadcast_dim(extended_lhs_shape.Dims(1), extended_rhs_shape.Dims(1)); + const int batch_dim2 = + broadcast_dim(extended_lhs_shape.Dims(2), extended_rhs_shape.Dims(2)); + + const int lhs_ext0 = extent(extended_lhs_shape, 0); + const int lhs_ext1 = extent(extended_lhs_shape, 1); + const int lhs_ext2 = extent(extended_lhs_shape, 2); + const int rhs_ext0 = extent(extended_rhs_shape, 0); + const int rhs_ext1 = extent(extended_rhs_shape, 1); + const int rhs_ext2 = extent(extended_rhs_shape, 2); + + // Set params for each matrix multiply. + const int lhs_rows = extended_lhs_shape.Dims(3); + const int rhs_cols = extended_rhs_shape.Dims(4); + const int accum_depth = extended_lhs_shape.Dims(4); + + const int ioff_ext0 = rhs_ext0 == 0 ? 0 : rhs_cols; + const int ioff_ext1 = rhs_ext1 == 0 ? 0 : rhs_cols; + const int ioff_ext2 = rhs_ext2 == 0 ? 0 : rhs_cols; + const int woff_ext0 = lhs_ext0 == 0 ? 0 : lhs_rows; + const int woff_ext1 = lhs_ext1 == 0 ? 0 : lhs_rows; + const int woff_ext2 = lhs_ext2 == 0 ? 0 : lhs_rows; + + if (!compute_row_sums || *compute_row_sums) { + int num_weights_matrices = 1; + for (int i = 1; i < extended_lhs_shape.DimensionsCount() - 2; ++i) { + num_weights_matrices *= extended_lhs_shape.Dims(i); + } + memset(row_sums, 0, sizeof(int32_t) * lhs_rows * num_weights_matrices); + for (int j = 0; j < num_weights_matrices; ++j) { + tensor_utils::PortableReductionSumVector( + lhs_data + j * lhs_rows * accum_depth, row_sums + j * lhs_rows, + lhs_rows, accum_depth); + } + if (compute_row_sums) { + *compute_row_sums = false; + } + } + + for (int b0 = 0; b0 < batch_dim0; ++b0) { + const int8_t* lhs_ptr0 = lhs_data + (b0 * lhs_ext0); + const int8_t* rhs_ptr0 = rhs_data + (b0 * rhs_ext0); + const int32_t* ioff_ptr0 = input_offset + (b0 * ioff_ext0); + const float* scale_ptr0 = scaling_factors + (b0 * ioff_ext0); + const int32_t* woff_ptr0 = row_sums + (b0 * woff_ext0); + for (int b1 = 0; b1 < batch_dim1; ++b1) { + const int8_t* lhs_ptr1 = lhs_ptr0 + b1 * lhs_ext1; + const int8_t* rhs_ptr1 = rhs_ptr0 + b1 * rhs_ext1; + const int32_t* ioff_ptr1 = ioff_ptr0 + (b1 * ioff_ext1); + const float* scale_ptr1 = scale_ptr0 + (b1 * ioff_ext1); + const int32_t* woff_ptr1 = woff_ptr0 + (b1 * woff_ext1); + for (int b2 = 0; b2 < batch_dim2; ++b2) { + const int8_t* lhs_ptr2 = lhs_ptr1 + b2 * lhs_ext2; + const int8_t* rhs_ptr2 = rhs_ptr1 + b2 * rhs_ext2; + const int32_t* ioff_ptr2 = ioff_ptr1 + (b2 * ioff_ext2); + const float* scale_ptr2 = scale_ptr1 + (b2 * ioff_ext2); + const int32_t* woff_ptr2 = woff_ptr1 + (b2 * woff_ext2); + float* out_ptr = output_data + ((b0 * batch_dim1 * batch_dim2) + + b1 * batch_dim2 + b2) * + lhs_rows * rhs_cols; + for (int j = 0; j < rhs_cols; ++j) { + const float batch_scaling_factor = scale_ptr2[j]; + const float batch_offset = static_cast(ioff_ptr2[j]); + for (int i = 0; i < lhs_rows; ++i) { + int32_t total = 0; + for (int k = 0; k < accum_depth; ++k) { + total += + lhs_ptr2[accum_depth * i + k] * rhs_ptr2[j * accum_depth + k]; + } + int32_t row_sum = woff_ptr2[i]; + total -= row_sum * batch_offset; + int idx = lhs_rows * j + i; + out_ptr[idx] += batch_scaling_factor * total; + } + } + } + } + } +} + } // namespace reference_ops } // namespace tflite From 03714f32b084c8f870b6c9a79bace34f537bd71c Mon Sep 17 00:00:00 2001 From: Andrew Audibert Date: Mon, 8 Jun 2020 13:49:56 -0700 Subject: [PATCH 074/178] [tf.data service] Rename GetElement TraceMe to GetDataServiceElement This makes it clearer what the TraceMe means when it shows up in xprof traces. PiperOrigin-RevId: 315344974 Change-Id: I2c1159c47b54444a593ca68aa2c42b5e8f040cab --- .../core/kernels/data/experimental/data_service_dataset_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc index a106bcb0a7c..ee8f72bc663 100644 --- a/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc @@ -448,7 +448,7 @@ class DataServiceDatasetOp::Dataset : public DatasetBase { TF_LOCKS_EXCLUDED(mu_) { VLOG(3) << "Getting an element for task id " << task->task_id; tensorflow::profiler::TraceMe activity( - "GetElement", tensorflow::profiler::TraceMeLevel::kInfo); + "GetDataServiceElement", tensorflow::profiler::TraceMeLevel::kInfo); CompressedElement compressed; bool end_of_sequence; for (int num_retries = 0;; ++num_retries) { From da37e9874b3eea0c3dfee3d9b3ead313c17e6f02 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 8 Jun 2020 14:08:48 -0700 Subject: [PATCH 075/178] Enable tests for var PiperOrigin-RevId: 315349295 Change-Id: I3ea745072fd7e0b4950f83daa539ede607460876 --- tensorflow/python/ops/numpy_ops/__init__.py | 2 + .../python/ops/numpy_ops/np_array_ops.py | 77 +++++++++++-------- 2 files changed, 45 insertions(+), 34 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/__init__.py b/tensorflow/python/ops/numpy_ops/__init__.py index 383206a83fd..8b979db829d 100644 --- a/tensorflow/python/ops/numpy_ops/__init__.py +++ b/tensorflow/python/ops/numpy_ops/__init__.py @@ -42,3 +42,5 @@ max = amax min = amin round = around # pylint: enable=redefined-builtin,undefined-variable + +from tensorflow.python.ops.array_ops import newaxis diff --git a/tensorflow/python/ops/numpy_ops/np_array_ops.py b/tensorflow/python/ops/numpy_ops/np_array_ops.py index a7b03fbb1ee..944aed1c5a1 100644 --- a/tensorflow/python/ops/numpy_ops/np_array_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_array_ops.py @@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import functools import math import numpy as np import six @@ -774,47 +773,57 @@ def amin(a, axis=None, keepdims=None): preserve_bool=True) -# TODO(wangpeng): Remove this workaround once b/157232284 is fixed -def _reduce_variance_complex(input_tensor, axis, keepdims): - f = functools.partial(math_ops.reduce_variance, axis=axis, keepdims=keepdims) - return f(math_ops.real(input_tensor)) + f(math_ops.imag(input_tensor)) +def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=None): # pylint: disable=missing-docstring + if dtype: + working_dtype = np_utils.result_type(a, dtype) + else: + working_dtype = None + if out is not None: + raise ValueError('Setting out is not supported.') + if ddof != 0: + # TF reduce_variance doesn't support ddof, so calculate it using raw ops. + def reduce_fn(input_tensor, axis, keepdims): + means = math_ops.reduce_mean(input_tensor, axis=axis, keepdims=True) + centered = input_tensor - means + if input_tensor.dtype in (dtypes.complex64, dtypes.complex128): + centered = math_ops.cast( + math_ops.real(centered * math_ops.conj(centered)), + input_tensor.dtype) + else: + centered = math_ops.square(centered) + squared_deviations = math_ops.reduce_sum( + centered, axis=axis, keepdims=keepdims) + if axis is None: + n = array_ops.size(input_tensor) + else: + if axis < 0: + axis += array_ops.rank(input_tensor) + n = math_ops.reduce_prod( + array_ops.gather(array_ops.shape(input_tensor), axis)) + n = math_ops.cast(n - ddof, input_tensor.dtype) -# TODO(wangpeng): Remove this workaround once b/157232284 is fixed -def _reduce_std_complex(input_tensor, axis, keepdims): - y = _reduce_variance_complex( - input_tensor=input_tensor, axis=axis, keepdims=keepdims) - return math_ops.sqrt(y) + return math_ops.cast(math_ops.divide(squared_deviations, n), dtype) + else: + reduce_fn = math_ops.reduce_variance - -@np_utils.np_doc(np.var) -def var(a, axis=None, keepdims=None): # pylint: disable=missing-function-docstring - - def f(input_tensor, axis, keepdims): - if input_tensor.dtype in (dtypes.complex64, dtypes.complex128): - # A workaround for b/157232284 - fn = _reduce_variance_complex - else: - fn = math_ops.reduce_variance - return fn(input_tensor=input_tensor, axis=axis, keepdims=keepdims) - - return _reduce( - f, a, axis=axis, dtype=None, keepdims=keepdims, promote_int=_TO_FLOAT) + result = _reduce( + reduce_fn, + a, + axis=axis, + dtype=working_dtype, + keepdims=keepdims, + promote_int=_TO_FLOAT).data + if dtype: + result = math_ops.cast(result, dtype) + return np_utils.tensor_to_ndarray(result) @np_utils.np_doc(np.std) def std(a, axis=None, keepdims=None): # pylint: disable=missing-function-docstring - - def f(input_tensor, axis, keepdims): - if input_tensor.dtype in (dtypes.complex64, dtypes.complex128): - # A workaround for b/157232284 - fn = _reduce_std_complex - else: - fn = math_ops.reduce_std - return fn(input_tensor=input_tensor, axis=axis, keepdims=keepdims) - return _reduce( - f, a, axis=axis, dtype=None, keepdims=keepdims, promote_int=_TO_FLOAT) + math_ops.reduce_std, a, axis=axis, dtype=None, keepdims=keepdims, + promote_int=_TO_FLOAT) @np_utils.np_doc(np.ravel) From 04a5c1d9b435301c7d2afc124a84ea76fd28b651 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 14:10:26 -0700 Subject: [PATCH 076/178] Avoid converting tensors with per-channel quantization to UINT8 in NNAPI delegate. PiperOrigin-RevId: 315349649 Change-Id: I47627d190351e43c09d567936d14ef96992a4a3b --- .../lite/delegates/nnapi/nnapi_delegate.cc | 3 +- .../delegates/nnapi/nnapi_delegate_test.cc | 109 ++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 2ca4cf35ba4..a3a3f9fda4d 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -1180,7 +1180,8 @@ class NNAPIOpBuilder { "setting new operand per channel quantization params", nnapi_errno_); } if (tensor->allocation_type == kTfLiteMmapRo) { - if (IsQuantized(tensor_type) && need_int8_conversion) { + if (IsQuantized(tensor_type) && need_int8_conversion && + nn_type != ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) { // We need to to add a tensor and convert the weights into uint8. // Currently this is only needed for fully_connected. The new_tensor is // needed for lifetime management for the converted weights. diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index 4caf5448b99..f8d368839c2 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -16,6 +16,8 @@ limitations under the License. #include +#include + #include #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/interpreter.h" @@ -845,6 +847,113 @@ TEST(ConvolutionOpTest, SimpleTestQuantizedWithDilation) { ElementsAreArray({5, 5, 5, 5, 5, 5, 5, 5, 5})); } +class PerChannelQuantizedConvolutionWithConstantFilterOpModel + : public SingleOpModelWithNNAPI { + public: + PerChannelQuantizedConvolutionWithConstantFilterOpModel( + const TensorData& input, const TensorData& filter, + std::initializer_list filter_data, + std::initializer_list bias_data, const TensorData& output, + int stride_width = 2, int stride_height = 2, + enum Padding padding = Padding_VALID, + enum ActivationFunctionType activation = ActivationFunctionType_NONE, + int dilation_width_factor = 1, int dilation_height_factor = 1) + : input_type_(input.type), filter_type_(filter.type) { + CHECK(filter.per_channel_quantization); + input_ = AddInput(input); + filter_ = AddConstInput(filter, filter_data); + + const int bias_size = GetShape(filter_)[0]; + const int num_channels = filter.per_channel_quantization_scales.size(); + const std::vector bias_offsets(num_channels, 0); + std::vector bias_scales(num_channels); + for (int i = 0; i < num_channels; i++) { + bias_scales[i] = input.scale * filter.per_channel_quantization_scales[i]; + } + const TensorData bias{TensorType_INT32, + {bias_size}, + /*min=*/0, + /*max=*/0, + /*scale=*/0, + /*zero_point=*/0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/bias_scales, + /*per_channel_quantization_offsets=*/bias_offsets, + /*channel_index==*/0}; + bias_ = AddConstInput(bias, bias_data); + + output_ = AddOutput(output); + + SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions, + CreateConv2DOptions( + builder_, padding, stride_width, stride_height, activation, + dilation_width_factor, dilation_height_factor) + .Union()); + + BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)}); + } + + void SetInput(std::initializer_list data) { + QuantizeAndPopulate(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + protected: + int input_; + int filter_; + int bias_; + int output_; + + const TensorType input_type_; + const TensorType filter_type_; +}; + +TEST(ConvolutionOpTest, SimplePerChannelTest) { + PerChannelQuantizedConvolutionWithConstantFilterOpModel m( + {TensorType_INT8, {1, 2, 3, 2}, -63.5, 64, 0.5, -1}, + {TensorType_INT8, + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + {2, 2, 2, 2}, + /*min=*/0, + /*max=*/0, + /*scale=*/0, + /*zero_point=*/0, + /*per_channel_quantization=*/true, + /*per_channel_quantization_scales=*/{1, 2}, + /*per_channel_quantization_offsets=*/{0, 0}, + /*channel_index=*/0}, + /*filter_data=*/ + { + // [2 * 2 * 2 * 2] as [output_channel, y, x, input_channel] + 1, 2, // out channel = 0, y = 0, x = 0 + 3, 4, // out channel = 0, y = 0, x = 1 + 3, 4, // out channel = 0, y = 1, x = 0 + 5, 6, // out channel = 0, y = 1, x = 1 + 4, 4, // out channel = 1, y = 0, x = 0 + 3, 3, // out channel = 1, y = 0, x = 1 + 2, 2, // out channel = 1, y = 1, x = 0 + 1, 1, // out channel = 1, y = 1, x = 1 + }, + /*bias_data=*/{6, -2}, {TensorType_INT8, {}, -63.5, 64, 0.5, -1}, + /*stride_width=*/1, /*stride_height=*/1); + m.SetInput({ + // [1 * 2 * 3 * 2] as [batch, y, x, input_channel] + 3, 2, // batch = 0, y = 0, x = 0 + 1, -1, // batch = 0, y = 0, x = 1 + -2, -3, // batch = 0, y = 0, x = 2 + 4, 3, // batch = 0, y = 1, x = 0 + 2, -2, // batch = 0, y = 1, x = 1 + -3, -4, // batch = 0, y = 1, x = 2 + }); + + // Invoke and verify output. + // output has dimension [1 * 1 * 2 * 2] as [batch, y, x, output_channel] + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + testing::Pointwise(QuantizedNear(), {61, 127, -115, -93})); +} + class DepthwiseConvolutionOpModel : public SingleOpModelWithNNAPI { public: DepthwiseConvolutionOpModel(const TensorData& input, const TensorData& filter, From 1acf61f0281ff2803c64c2e04fb95a2f2b78e5ab Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Mon, 8 Jun 2020 14:35:55 -0700 Subject: [PATCH 077/178] Prefix OpFusion and ConvertReadonlyReferenceVariablesToResourceVariablesPass pass flag with 'tf'. (NFC) Other passes have the 'tf' prefix for their flags, so this is to have flags be more consistent with one another, under the tensorflow directory. PiperOrigin-RevId: 315355169 Change-Id: I42258041c5b455a140f5e338f146b305a16f7a83 --- tensorflow/compiler/mlir/tensorflow/tests/op_fusion.mlir | 2 +- .../mlir/tensorflow/tests/readonly_references_to_resources.mlir | 2 +- tensorflow/compiler/mlir/tensorflow/transforms/op_fusion.cc | 2 +- .../tensorflow/transforms/readonly_references_to_resources.cc | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/tests/op_fusion.mlir b/tensorflow/compiler/mlir/tensorflow/tests/op_fusion.mlir index 4688d2ee712..77ce19da8a4 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/op_fusion.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/op_fusion.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt %s -op-fusion | FileCheck %s --dump-input-on-failure +// RUN: tf-opt %s -tf-op-fusion | FileCheck %s --dump-input-on-failure //===----------------------------------------------------------------------===// // Conv2D + BiasAdd + fusions. diff --git a/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir b/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir index 2970e31c3c9..fe3234bcc4e 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/readonly_references_to_resources.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt -verify-diagnostics -readonly-references-to-resources -split-input-file %s | FileCheck %s --dump-input=fail +// RUN: tf-opt -verify-diagnostics -tf-readonly-references-to-resources -split-input-file %s | FileCheck %s --dump-input=fail // Test case: Basic converting. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/op_fusion.cc b/tensorflow/compiler/mlir/tensorflow/transforms/op_fusion.cc index 202783d1cc8..0349a5b26ba 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/op_fusion.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/op_fusion.cc @@ -166,7 +166,7 @@ std::unique_ptr> CreateOpFusionPass() { } static PassRegistration pass( - "op-fusion", + "tf-op-fusion", "Replaces commonly occurring subgraphs with optimized fused kernels"); } // namespace TF diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc index a80b84ddeda..5fc35361bca 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc @@ -171,7 +171,7 @@ CreateConvertReadonlyReferenceVariablesToResourceVariablesPass() { static PassRegistration< ConvertReadonlyReferenceVariablesToResourceVariablesPass> - pass("readonly-references-to-resources", + pass("tf-readonly-references-to-resources", "Convert readonly reference variables to resource variables."); } // namespace TF From d7da550fc7b1f9ab3394c927c5078c22f75b8eca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 14:36:42 -0700 Subject: [PATCH 078/178] Make eager/monitoring_test.py less flaky. MonitoredTimer measures wall time, so we're susceptible to the process being interrupted. To measure time intervals of different lengths, wrap them inside each other, that way guaranteeing that the inner one is shorter than the outer one. PiperOrigin-RevId: 315355335 Change-Id: I6eb136f221f4b96917ad8f6708f4a9bad18841c9 --- tensorflow/python/eager/monitoring_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/monitoring_test.py b/tensorflow/python/eager/monitoring_test.py index 7cb8c0c2cd1..de9d3bcb8f4 100644 --- a/tensorflow/python/eager/monitoring_test.py +++ b/tensorflow/python/eager/monitoring_test.py @@ -104,10 +104,10 @@ class MonitoringTest(test_util.TensorFlowTestCase): def test_context_manager(self): counter = monitoring.Counter('test/ctxmgr', 'test context manager', 'slot') - with monitoring.MonitoredTimer(counter.get_cell('short')): - time.sleep(0.001) with monitoring.MonitoredTimer(counter.get_cell('long')): - time.sleep(0.02) + time.sleep(0.01) + with monitoring.MonitoredTimer(counter.get_cell('short')): + time.sleep(0.01) self.assertGreater( counter.get_cell('long').value(), counter.get_cell('short').value()) From de901d9be98bf300883b30d1b43364fbc036a735 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 14:39:56 -0700 Subject: [PATCH 079/178] Consolidate error and warning message population. 1. Add a diagnostic message which contains error, warning and info for diagnosing profiling anomalies. Replace individual error and warning fields by the diagnostic message. 2. Add unified PopulateStepDiagnositics, PopulateOverviewDiagnositics utilities in diagnostics.cc to be used by all tool converters. 3. Add unifid GenerateDiagnosticDatatable in TfStatsProcessor for all child tools. Add a similar generate_diagnostics_table for python converters. PiperOrigin-RevId: 315356051 Change-Id: Ic1f1d6d43b3fec850ccb04c3184f5ba7bbd694af --- tensorflow/core/profiler/convert/BUILD | 4 +-- .../op_stats_to_input_pipeline_analysis.cc | 15 ++------- .../convert/op_stats_to_overview_page.cc | 31 ++----------------- .../convert/op_stats_to_overview_page.h | 5 --- tensorflow/core/profiler/protobuf/BUILD | 15 ++++++++- .../core/profiler/protobuf/diagnostics.proto | 11 +++++++ .../profiler/protobuf/input_pipeline.proto | 7 +++-- .../profiler/protobuf/overview_page.proto | 7 +++-- tensorflow/core/profiler/utils/BUILD | 10 ++++-- .../utils/{errors.cc => diagnostics.cc} | 25 ++++++++++++++- .../utils/{errors.h => diagnostics.h} | 6 ++++ 11 files changed, 76 insertions(+), 60 deletions(-) create mode 100644 tensorflow/core/profiler/protobuf/diagnostics.proto rename tensorflow/core/profiler/utils/{errors.cc => diagnostics.cc} (63%) rename tensorflow/core/profiler/utils/{errors.h => diagnostics.h} (82%) diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD index ff752be0308..5a2fd86f04d 100644 --- a/tensorflow/core/profiler/convert/BUILD +++ b/tensorflow/core/profiler/convert/BUILD @@ -100,7 +100,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:overview_page_proto_cc", "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", "//tensorflow/core/profiler/protobuf:tf_function_proto_cc", - "//tensorflow/core/profiler/utils:errors", + "//tensorflow/core/profiler/utils:diagnostics", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", "//tensorflow/core/profiler/utils:op_metrics_db_utils", @@ -125,7 +125,7 @@ cc_library( "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc", "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", - "//tensorflow/core/profiler/utils:errors", + "//tensorflow/core/profiler/utils:diagnostics", "//tensorflow/core/profiler/utils:event_span", "//tensorflow/core/profiler/utils:html_utils", "//tensorflow/core/profiler/utils:math_utils", diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index ad0665f4f63..0e04fc2b7ba 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/op_metrics.pb.h" #include "tensorflow/core/profiler/protobuf/op_stats.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" -#include "tensorflow/core/profiler/utils/errors.h" +#include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/event_span.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" @@ -552,23 +552,12 @@ StepSummary ComputeStepTimeSummaryInMs( return GetStepSummaryForSampleStats(total_step_stats_in_ms); } -void AddErrorMessages(const OpStats& op_stats, - InputPipelineAnalysisResult* result) { - if (op_stats.step_db().use_incomplete_step()) { - *result->add_error_messages() = - absl::StrCat("WARNING: ", kErrorIncompleteStep); - } else if (op_stats.step_db().step_sequence().empty()) { - *result->add_error_messages() = - absl::StrCat("WARNING: ", kErrorNoStepMarker); - } -} - InputPipelineAnalysisResult ConvertOpStatsToInputPipelineAnalysis( const OpStats& op_stats, const HardwareType& hardware_type) { InputPipelineAnalysisResult result = ComputeGenericInputPipelineAnalysisResult( op_stats.step_db().step_sequence()); - AddErrorMessages(op_stats, &result); + PopulateStepDiagnostics(op_stats, result.mutable_diagnostics()); result.set_hardware_type(HardwareType_Name(hardware_type)); GenerateHostResult(op_stats.host_op_metrics_db(), &result); diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc index 62f37c50155..96bbcc24fff 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.cc @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/profiler/protobuf/overview_page.pb.h" #include "tensorflow/core/profiler/protobuf/steps_db.pb.h" #include "tensorflow/core/profiler/protobuf/tf_function.pb.h" -#include "tensorflow/core/profiler/utils/errors.h" +#include "tensorflow/core/profiler/utils/diagnostics.h" #include "tensorflow/core/profiler/utils/html_utils.h" #include "tensorflow/core/profiler/utils/math_utils.h" #include "tensorflow/core/profiler/utils/op_metrics_db_utils.h" @@ -175,7 +175,6 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) { op->set_flop_rate( SafeDivide(metrics->flops(), PicosToNanos(metrics->time_ps()))); } - SetRemarks(op_stats, &analysis); uint64 total_device_compute_ps = op_stats.device_op_metrics_db().precision_stats().compute_16bit_ps() + op_stats.device_op_metrics_db().precision_stats().compute_32bit_ps(); @@ -297,35 +296,9 @@ OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, bottleneck.input_classification(), bottleneck.input_statement(), "", hardware_type, TfFunctionRecommendationHtml(op_stats.tf_function_db()), overview_page.mutable_recommendation()); - SetOverviewPageErrorMessage(op_stats, &overview_page); + PopulateOverviewDiagnostics(op_stats, overview_page.mutable_diagnostics()); return overview_page; } -void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis) { - if (op_stats.step_db().use_incomplete_step()) { - analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorIncompleteStep)); - analysis->set_remark_color("red"); - } else if (op_stats.step_db().step_sequence().empty()) { - analysis->set_remark_text(absl::StrCat("WARNING: ", kErrorNoStepMarker)); - analysis->set_remark_color("red"); - } else { - analysis->set_remark_text(""); - analysis->set_remark_color("black"); - } -} - -void SetOverviewPageErrorMessage(const OpStats& op_stats, - OverviewPage* overview_page) { - *overview_page->mutable_errors() = op_stats.errors(); - absl::c_sort(*overview_page->mutable_errors()); - if (overview_page->errors().empty()) { - // Shows run-environment error only if there is no other existing error. - if (op_stats.run_environment().device_type() != "CPU" && - op_stats.run_environment().device_core_count() <= 0) { - *overview_page->add_errors() = std::string(kNoDeviceTraceCollected); - } - } -} - } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h index d4d75c03454..098185b8feb 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_overview_page.h +++ b/tensorflow/core/profiler/convert/op_stats_to_overview_page.h @@ -48,17 +48,12 @@ OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats); OverviewPageRunEnvironment ComputeRunEnvironment( const RunEnvironment& run_environment); -void SetOverviewPageErrorMessage(const OpStats& op_stats, - OverviewPage* overview_page); - OverviewPage ConvertOpStatsToOverviewPage(const OpStats& op_stats, HardwareType hardware_type); // Returns a html which provides tf-function related recommendation. std::string TfFunctionRecommendationHtml(const TfFunctionDb& tf_function_db); -void SetRemarks(const OpStats& op_stats, OverviewPageAnalysis* analysis); - } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD index b102fe2ec25..cd84aeb6259 100644 --- a/tensorflow/core/profiler/protobuf/BUILD +++ b/tensorflow/core/profiler/protobuf/BUILD @@ -26,10 +26,20 @@ exports_files( visibility = ["//tensorflow/core:__pkg__"], ) +tf_proto_library( + name = "diagnostics_proto", + srcs = ["diagnostics.proto"], + cc_api_version = 2, + visibility = [ + ":friends", + ], +) + tf_proto_library( name = "input_pipeline_proto", srcs = ["input_pipeline.proto"], cc_api_version = 2, + protodeps = [":diagnostics_proto"], visibility = [ ":friends", ], @@ -39,7 +49,10 @@ tf_proto_library( name = "overview_page_proto", srcs = ["overview_page.proto"], cc_api_version = 2, - protodeps = [":input_pipeline_proto"], + protodeps = [ + ":diagnostics_proto", + ":input_pipeline_proto", + ], visibility = [ ":friends", ], diff --git a/tensorflow/core/profiler/protobuf/diagnostics.proto b/tensorflow/core/profiler/protobuf/diagnostics.proto new file mode 100644 index 00000000000..def2d4e49d4 --- /dev/null +++ b/tensorflow/core/profiler/protobuf/diagnostics.proto @@ -0,0 +1,11 @@ +// This proto describes the diagnostics for debugging profiling issues of +// the TensorFlow profiler. +syntax = "proto3"; + +package tensorflow.profiler; + +message Diagnostics { + repeated string info = 1; + repeated string warnings = 2; + repeated string errors = 3; +} diff --git a/tensorflow/core/profiler/protobuf/input_pipeline.proto b/tensorflow/core/profiler/protobuf/input_pipeline.proto index cc60b88a913..abd3ff78323 100644 --- a/tensorflow/core/profiler/protobuf/input_pipeline.proto +++ b/tensorflow/core/profiler/protobuf/input_pipeline.proto @@ -3,6 +3,7 @@ syntax = "proto3"; package tensorflow.profiler; import "google/protobuf/any.proto"; +import "tensorflow/core/profiler/protobuf/diagnostics.proto"; // Generic hardware bottleneck. message BottleneckAnalysis { @@ -151,7 +152,7 @@ message InputPipelineAnalysisResult { // Breakdown of the step time. Can be unpacked into a // GenericStepTimeBreakdown. google.protobuf.Any step_time_breakdown = 8; - // Error messages. - repeated string error_messages = 10; - reserved 1; + // Error and warning messages for diagnosing profiling issues. + Diagnostics diagnostics = 12; + reserved 1, 10; } diff --git a/tensorflow/core/profiler/protobuf/overview_page.proto b/tensorflow/core/profiler/protobuf/overview_page.proto index 1590076d55f..cbef05d4d9f 100644 --- a/tensorflow/core/profiler/protobuf/overview_page.proto +++ b/tensorflow/core/profiler/protobuf/overview_page.proto @@ -3,6 +3,7 @@ syntax = "proto3"; package tensorflow.profiler; import "google/protobuf/any.proto"; +import "tensorflow/core/profiler/protobuf/diagnostics.proto"; import "tensorflow/core/profiler/protobuf/input_pipeline.proto"; // Overview result for a TensorFlow Op. @@ -168,7 +169,7 @@ message OverviewPage { OverviewPageAnalysis analysis = 3; // The recommendation made to the user. OverviewPageRecommendation recommendation = 4; - // Errors. - repeated string errors = 7; - reserved 1, 5; + // Error and warning messages for diagnosing profiling issues. + Diagnostics diagnostics = 8; + reserved 1, 5, 7; } diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD index 279600cc1f8..dcbde4774ed 100644 --- a/tensorflow/core/profiler/utils/BUILD +++ b/tensorflow/core/profiler/utils/BUILD @@ -13,10 +13,14 @@ package_group( ) cc_library( - name = "errors", - srcs = ["errors.cc"], - hdrs = ["errors.h"], + name = "diagnostics", + srcs = ["diagnostics.cc"], + hdrs = ["diagnostics.h"], deps = [ + "//tensorflow/core/profiler/protobuf:diagnostics_proto_cc", + "//tensorflow/core/profiler/protobuf:op_stats_proto_cc", + "//tensorflow/core/profiler/protobuf:steps_db_proto_cc", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/core/profiler/utils/errors.cc b/tensorflow/core/profiler/utils/diagnostics.cc similarity index 63% rename from tensorflow/core/profiler/utils/errors.cc rename to tensorflow/core/profiler/utils/diagnostics.cc index 1851c624e5c..dc89531b867 100644 --- a/tensorflow/core/profiler/utils/errors.cc +++ b/tensorflow/core/profiler/utils/diagnostics.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/profiler/utils/errors.h" +#include "tensorflow/core/profiler/utils/diagnostics.h" +#include "absl/algorithm/container.h" #include "absl/strings/string_view.h" +#include "tensorflow/core/profiler/protobuf/steps_db.pb.h" namespace tensorflow { namespace profiler { @@ -38,5 +40,26 @@ const absl::string_view kNoDeviceTraceCollected = "run on the device when sampling was turned on. You could try the sampling" " again later."; +void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag) { + if (op_stats.step_db().use_incomplete_step()) { + *diag->add_warnings() = std::string(kErrorIncompleteStep); + } else if (op_stats.step_db().step_sequence().empty()) { + *diag->add_warnings() = std::string(kErrorNoStepMarker); + } +} + +void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag) { + *diag->mutable_errors() = op_stats.errors(); + absl::c_sort(*diag->mutable_errors()); + if (diag->errors().empty()) { + // Shows run-environment error only if there is no other existing error. + if (op_stats.run_environment().device_type() != "CPU" && + op_stats.run_environment().device_core_count() <= 0) { + *diag->add_errors() = std::string(kNoDeviceTraceCollected); + } + } + PopulateStepDiagnostics(op_stats, diag); +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/errors.h b/tensorflow/core/profiler/utils/diagnostics.h similarity index 82% rename from tensorflow/core/profiler/utils/errors.h rename to tensorflow/core/profiler/utils/diagnostics.h index 2dcb60e6899..7b62a6ad433 100644 --- a/tensorflow/core/profiler/utils/errors.h +++ b/tensorflow/core/profiler/utils/diagnostics.h @@ -17,6 +17,8 @@ limitations under the License. #define TENSORFLOW_CORE_PROFILER_UTILS_ERRORS_H_ #include "absl/strings/string_view.h" +#include "tensorflow/core/profiler/protobuf/diagnostics.pb.h" +#include "tensorflow/core/profiler/protobuf/op_stats.pb.h" namespace tensorflow { namespace profiler { @@ -30,6 +32,10 @@ ABSL_CONST_INIT extern const absl::string_view kErrorNoStepMarker; ABSL_CONST_INIT extern const absl::string_view kNoDeviceTraceCollected; +void PopulateStepDiagnostics(const OpStats& op_stats, Diagnostics* diag); + +void PopulateOverviewDiagnostics(const OpStats& op_stats, Diagnostics* diag); + } // namespace profiler } // namespace tensorflow From 7152155517fbda482b4bffe66ddf56fd06b6aa04 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Mon, 8 Jun 2020 14:40:01 -0700 Subject: [PATCH 080/178] Always increment iterations in LossScaleOptimizer. Now self.iterations is incremented in LossScaleOptimizer.apply_gradients even when gradients are not applied to the variables due to NaNs. Before, self.iterations was not incremented because I considered self.iterations to represent the number of times gradients were applied to variables. But this caused confusion because apply_gradients() did not always increment self.iterations, so now it is incremented unconditionally. PiperOrigin-RevId: 315356072 Change-Id: I4e8620d1ef84af55eb7a1f70b20671a7d7af38b9 --- .../experimental/loss_scale_optimizer.py | 8 ++++++- .../experimental/loss_scale_optimizer_test.py | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py index d6a786aa4e4..72d9d0390fd 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py @@ -395,13 +395,19 @@ class LossScaleOptimizer(_DelegatingTrackableMixin, optimizer_v2.OptimizerV2): self._apply_gradients, args=(grads, wrapped_vars, name, experimental_aggregate_gradients)) + def do_not_apply_fn(): + # Normally self._optimizer.iterations is incremented in + # self._optimizer.apply_gradients(). Since that is not called in this + # branch, we increment it here instead. + return self._optimizer.iterations.assign_add(1, read_value=False) + # Note: We must call this cond() in a cross-replica context. # DistributionStrategy does not support having a cond in a replica context # with a branch that calls `merge_call`, and self._optimizer.apply_gradients # calls `merge_call`. maybe_apply_op = smart_cond.smart_cond(should_apply_grads, apply_fn, - control_flow_ops.no_op) + do_not_apply_fn) return control_flow_ops.group(maybe_apply_op, loss_scale_update_op) def _apply_gradients(self, grads, wrapped_vars, name, diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py index 20252ff3885..992ed17f0c6 100644 --- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py +++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py @@ -285,6 +285,30 @@ class LossScaleOptimizerTest(test.TestCase, parameterized.TestCase): self.assertEqual(lso.iterations, 7) self.assertEqual(opt.iterations, 7) + @parameterized.named_parameters(*TESTCASES) + def testIterationsIncremented(self, strategy_fn): + with strategy_fn().scope() as strategy: + # Test iterations is incremented in opt.minimize. + opt = gradient_descent.SGD(1.0) + opt = loss_scale_optimizer.LossScaleOptimizer(opt, loss_scale='dynamic') + var = variables.Variable([5.0]) + loss = lambda: var * 2.0 / strategy.num_replicas_in_sync + run_fn = lambda: opt.minimize(loss, [var]) + run_op = strategy.experimental_run(run_fn) + self.evaluate(variables.global_variables_initializer()) + self._run_if_in_graph_mode(run_op) + self.assertEqual(self.evaluate(var), 3.0) # Grad is 2, so var is 5 - 2 + self.assertEqual(self.evaluate(opt.iterations), 1) + + # Test iterations is incremented in opt.minimize even if gradients aren't + # applied to variables due to NaN gradients. + loss = lambda: var * float('NaN') + run_fn = lambda: opt.minimize(loss, [var]) + run_op = strategy.experimental_run(run_fn) + self._run_if_in_graph_mode(run_op) + self.assertEqual(self.evaluate(var), 3.0) + self.assertEqual(self.evaluate(opt.iterations), 2) + def testWeightMethods(self): with self.test_session(): var = variables.Variable([1.0]) From 27d684112b1b3337eebdbe5496f1fc5217403940 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 8 Jun 2020 15:12:18 -0700 Subject: [PATCH 081/178] [XLA] Softplus should be monontic While we are here, add NumPy support for sorting bfloat16 values to make it easier to write the test. PiperOrigin-RevId: 315363425 Change-Id: I3830835549ca02754da8c657e3722f9f0462a12a --- tensorflow/compiler/tests/unary_ops_test.py | 28 ++++++++-- .../compiler/tf2xla/kernels/unary_ops.cc | 29 ++++++---- tensorflow/compiler/xla/python/bfloat16.cc | 24 +++++++++ .../compiler/xla/python/bfloat16_test.py | 6 +++ tensorflow/python/lib/core/bfloat16.cc | 24 +++++++++ tensorflow/python/lib/core/bfloat16_test.py | 54 +++++++++++-------- 6 files changed, 130 insertions(+), 35 deletions(-) diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index f2ec6be43cb..567e75a9a17 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -85,6 +85,11 @@ class UnaryOpsTest(xla_test.XLATestCase): for i in xrange(len(result)): self.assertAllClose(result[i], expected[i], rtol, atol) + def AssertCloseAndSorted(self, result, expected, rtol, atol): + """Tests that result and expeted are both close and sorted.""" + self.assertAllClose(result, expected, rtol, atol) + self.assertAllEqual(np.sort(result), result) + @test_util.disable_mlir_bridge( "MlirHloBuilder::Iota missing required for xla::Diag") def testAllTypeOps(self): @@ -1122,17 +1127,27 @@ class UnaryOpsTest(xla_test.XLATestCase): [[[12, 13, 14, 15, 28, 29, 30, 31]]]]], dtype=dtype)) - def _assertSoftplusMatchesExpected(self, features, dtype): + def _assertSoftplusMatchesExpected(self, + features, + dtype, + equality_test=None, + rtol=1e-6, + atol=9.1e-6): features = np.array(features, dtype=dtype) zero = np.asarray(0).astype(dtype) expected = np.logaddexp(zero, features).astype(dtype) self._assertOpOutputMatchesExpected( - nn_ops.softplus, features, expected=expected, rtol=1e-6, atol=9.1e-6) + nn_ops.softplus, + features, + expected=expected, + equality_test=equality_test, + rtol=rtol, + atol=atol) @test_util.disable_mlir_bridge( "bf16 type not supported in CreateDenseElementsAttrFromLiteral") def testSoftplus(self): - for dtype in self.float_types: + for dtype in self.float_types & {dtypes.float32, dtypes.float64}: self._assertSoftplusMatchesExpected([[-2, 0, 8]], dtype) self._assertSoftplusMatchesExpected( [[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]], dtype) @@ -1148,6 +1163,13 @@ class UnaryOpsTest(xla_test.XLATestCase): -log_eps + ten ], dtype) + self._assertSoftplusMatchesExpected( + [0.69302183, 0.69324386], + dtype, + equality_test=self.AssertCloseAndSorted, + rtol=9e-5, + atol=9e-5) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 83a894e91fe..405c5e787da 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -89,16 +89,25 @@ XLAJIT_MAKE_UNARY(Sign, xla::Select(xla::Ne(x, x), xla::ZerosLike(x), xla::Sign(x))); XLAJIT_MAKE_UNARY(Sinh, xla::Sinh(x)); -// softplus(x) = log(1 + exp(x)) -// -// This is not numerically stable when x is large, it can easily overflow. -// However, we can compute it as LogSumExp(x, 0): -// max(x, 0) + log(exp(x - max(x, 0)) + exp(0 - max(x, 0))) -// -// This is equivalent to: -// max(x, 0) + log1p(exp(-abs(x))) -XLAJIT_MAKE_UNARY(Softplus, xla::Max(x, xla::ScalarLike(x, 0.0)) + - xla::Log1p(xla::Exp(-xla::Abs(x)))); +static xla::XlaOp Softplus(xla::XlaBuilder* b, xla::XlaOp features) { + return b->ReportErrorOrReturn([&]() -> xla::StatusOr { + TF_ASSIGN_OR_RETURN(auto shape, b->GetShape(features)); + xla::XlaOp threshold = + Log(xla::Epsilon(b, shape.element_type())) + ScalarLike(features, 2.0); + // Value above which exp(x) may overflow, but softplus(x) == x + // is within machine epsilon. + xla::XlaOp too_large = Gt(features, -threshold); + // Value below which exp(x) may underflow, but softplus(x) == exp(x) + // is within machine epsilon. + xla::XlaOp too_small = Lt(features, threshold); + xla::XlaOp features_exp = Exp(features); + xla::XlaOp output = + Select(too_large, features, + Select(too_small, features_exp, Log1p(features_exp))); + return output; + }); +} +XLAJIT_MAKE_UNARY(Softplus, Softplus(b, x)); // softsign(x) = x / (abs(x) + 1) XLAJIT_MAKE_UNARY(Softsign, x / (xla::Abs(x) + xla::ScalarLike(x, 1.0))); diff --git a/tensorflow/compiler/xla/python/bfloat16.cc b/tensorflow/compiler/xla/python/bfloat16.cc index 9e38769168d..0b98d0c5f9b 100644 --- a/tensorflow/compiler/xla/python/bfloat16.cc +++ b/tensorflow/compiler/xla/python/bfloat16.cc @@ -441,6 +441,29 @@ void ByteSwap16(void* value) { std::swap(p[0], p[1]); } +int NPyBfloat16_Compare(const void* a, const void* b, void* arr) { + bfloat16 x; + memcpy(&x, a, sizeof(bfloat16)); + + bfloat16 y; + memcpy(&y, b, sizeof(bfloat16)); + + if (x < y) { + return -1; + } + if (y < x) { + return 1; + } + // NaNs sort to the end. + if (!std::isnan(x) && std::isnan(y)) { + return -1; + } + if (std::isnan(x) && !std::isnan(y)) { + return 1; + } + return 0; +} + void NPyBfloat16_CopySwapN(void* dstv, npy_intp dstride, void* srcv, npy_intp sstride, npy_intp n, int swap, void* arr) { char* dst = reinterpret_cast(dstv); @@ -1280,6 +1303,7 @@ bool Initialize() { PyArray_InitArrFuncs(&NPyBfloat16_ArrFuncs); NPyBfloat16_ArrFuncs.getitem = NPyBfloat16_GetItem; NPyBfloat16_ArrFuncs.setitem = NPyBfloat16_SetItem; + NPyBfloat16_ArrFuncs.compare = NPyBfloat16_Compare; NPyBfloat16_ArrFuncs.copyswapn = NPyBfloat16_CopySwapN; NPyBfloat16_ArrFuncs.copyswap = NPyBfloat16_CopySwap; NPyBfloat16_ArrFuncs.nonzero = NPyBfloat16_NonZero; diff --git a/tensorflow/compiler/xla/python/bfloat16_test.py b/tensorflow/compiler/xla/python/bfloat16_test.py index 4c4f8c28d3f..60b56bf810d 100644 --- a/tensorflow/compiler/xla/python/bfloat16_test.py +++ b/tensorflow/compiler/xla/python/bfloat16_test.py @@ -219,6 +219,12 @@ class Bfloat16Test(parameterized.TestCase): numpy_assert_allclose( a, b, rtol=0.1, atol=0.1, equal_nan=True, err_msg="", verbose=True) + def testSort(self): + values_to_sort = np.float32(FLOAT_VALUES) + sorted_f32 = np.sort(values_to_sort) + sorted_bf16 = np.sort(values_to_sort.astype(bfloat16)) + np.testing.assert_equal(sorted_f32, np.float32(sorted_bf16)) + BinaryOp = collections.namedtuple("BinaryOp", ["op"]) diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc index d165c47910b..feb01f11a1a 100644 --- a/tensorflow/python/lib/core/bfloat16.cc +++ b/tensorflow/python/lib/core/bfloat16.cc @@ -412,6 +412,29 @@ void ByteSwap16(void* value) { std::swap(p[0], p[1]); } +int NPyBfloat16_Compare(const void* a, const void* b, void* arr) { + bfloat16 x; + memcpy(&x, a, sizeof(bfloat16)); + + bfloat16 y; + memcpy(&y, b, sizeof(bfloat16)); + + if (x < y) { + return -1; + } + if (y < x) { + return 1; + } + // NaNs sort to the end. + if (!std::isnan(x) && std::isnan(y)) { + return -1; + } + if (std::isnan(x) && !std::isnan(y)) { + return 1; + } + return 0; +} + void NPyBfloat16_CopySwapN(void* dstv, npy_intp dstride, void* srcv, npy_intp sstride, npy_intp n, int swap, void* arr) { char* dst = reinterpret_cast(dstv); @@ -561,6 +584,7 @@ bool Initialize() { PyArray_InitArrFuncs(&NPyBfloat16_ArrFuncs); NPyBfloat16_ArrFuncs.getitem = NPyBfloat16_GetItem; NPyBfloat16_ArrFuncs.setitem = NPyBfloat16_SetItem; + NPyBfloat16_ArrFuncs.compare = NPyBfloat16_Compare; NPyBfloat16_ArrFuncs.copyswapn = NPyBfloat16_CopySwapN; NPyBfloat16_ArrFuncs.copyswap = NPyBfloat16_CopySwap; NPyBfloat16_ArrFuncs.nonzero = NPyBfloat16_NonZero; diff --git a/tensorflow/python/lib/core/bfloat16_test.py b/tensorflow/python/lib/core/bfloat16_test.py index 32453ae2296..f19029911bf 100644 --- a/tensorflow/python/lib/core/bfloat16_test.py +++ b/tensorflow/python/lib/core/bfloat16_test.py @@ -32,15 +32,19 @@ from tensorflow.python.platform import test bfloat16 = _pywrap_bfloat16.TF_bfloat16_type() -class Bfloat16Test(test.TestCase): +def float_values(): + """Returns values that should round trip exactly to float and back.""" + epsilon = float.fromhex("1.0p-7") + return [ + 0.0, 1.0, -1, 0.5, -0.5, epsilon, 1.0 + epsilon, 1.0 - epsilon, + -1.0 - epsilon, -1.0 + epsilon, 3.5, 42.0, 255.0, 256.0, + float("inf"), + float("-inf"), + float("nan") + ] - def float_values(self): - """Returns values that should round trip exactly to float and back.""" - epsilon = float.fromhex("1.0p-7") - return [ - 0.0, 1.0, -1, 0.5, -0.5, epsilon, 1.0 + epsilon, 1.0 - epsilon, - -1.0 - epsilon, -1.0 + epsilon, 3.5, 42.0, 255.0, 256.0, - float("inf"), float("-inf"), float("nan")] + +class Bfloat16Test(test.TestCase): def _assertFloatIdentical(self, v, w): if math.isnan(v): @@ -49,7 +53,7 @@ class Bfloat16Test(test.TestCase): self.assertEqual(v, w) def testRoundTripToFloat(self): - for v in self.float_values(): + for v in float_values(): self._assertFloatIdentical(v, float(bfloat16(v))) def testRoundTripToInt(self): @@ -82,7 +86,7 @@ class Bfloat16Test(test.TestCase): # Tests for Python operations def testNegate(self): - for v in self.float_values(): + for v in float_values(): self._assertFloatIdentical(-v, float(-bfloat16(v))) def testAdd(self): @@ -132,33 +136,33 @@ class Bfloat16Test(test.TestCase): self.assertTrue(math.isnan(float(bfloat16(3.5) / bfloat16(float("nan"))))) def testLess(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v < w, bfloat16(v) < bfloat16(w)) def testLessEqual(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v <= w, bfloat16(v) <= bfloat16(w)) def testGreater(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v > w, bfloat16(v) > bfloat16(w)) def testGreaterEqual(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v >= w, bfloat16(v) >= bfloat16(w)) def testEqual(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v == w, bfloat16(v) == bfloat16(w)) def testNotEqual(self): - for v in self.float_values(): - for w in self.float_values(): + for v in float_values(): + for w in float_values(): self.assertEqual(v != w, bfloat16(v) != bfloat16(w)) def testNan(self): @@ -259,6 +263,12 @@ class Bfloat16NumPyTest(test.TestCase): np.arange(-16384., 16384., 64., dtype=np.float32).astype(bfloat16), np.arange(-16384., 16384., 64., dtype=bfloat16)) + def testSort(self): + values_to_sort = np.float32(float_values()) + sorted_f32 = np.sort(values_to_sort) + sorted_bf16 = np.sort(values_to_sort.astype(bfloat16)) + self.assertAllEqual(sorted_f32, np.float32(sorted_bf16)) + if __name__ == "__main__": test.main() From 112154f2c88ff4d60161f1c6e14f4f428bc77636 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Mon, 8 Jun 2020 15:47:37 -0700 Subject: [PATCH 082/178] Reduce the usage of keras generic util for Feature column. PiperOrigin-RevId: 315369798 Change-Id: Iaa5638d30e324525fda26a88bf07d04e8f40196a --- tensorflow/python/feature_column/serialization.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/feature_column/serialization.py b/tensorflow/python/feature_column/serialization.py index 970227c4e75..530fe54c876 100644 --- a/tensorflow/python/feature_column/serialization.py +++ b/tensorflow/python/feature_column/serialization.py @@ -84,8 +84,7 @@ def serialize_feature_column(fc): if isinstance(fc, six.string_types): return fc elif isinstance(fc, fc_lib.FeatureColumn): - return generic_utils.serialize_keras_class_and_config( - fc.__class__.__name__, fc.get_config()) # pylint: disable=protected-access + return {'class_name': fc.__class__.__name__, 'config': fc.get_config()} else: raise ValueError('Instance: {} is not a FeatureColumn'.format(fc)) From 477c1312ba0f0c314e04bb2bbdb65e44675443fd Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Mon, 8 Jun 2020 16:01:48 -0700 Subject: [PATCH 083/178] Move AddBuiltin and AddCustom out of the interface. To get the desired code size reduction from the newer API for adding builtin operators, we need the AddBuiltin function to be removed by the linker. However, linkers currently have limited support for removing unused functions that are virtual. This change moves the AddBuiltin function out of the interface (and AddCustom as well for consistency). https://stackoverflow.com/q/17433791 https://reviews.llvm.org/D63932 http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.kui0101a/armlink_bhcgeaid.htm PiperOrigin-RevId: 315372426 Change-Id: I0dd1fc8a62dbe60076c7c847b43294ac273d0bad --- .../lite/micro/micro_interpreter_test.cc | 12 -- .../lite/micro/micro_mutable_op_resolver.h | 147 +++++++++++------- tensorflow/lite/micro/micro_op_resolver.h | 21 --- 3 files changed, 91 insertions(+), 89 deletions(-) diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index bd4e536218d..ce08e44435f 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -172,18 +172,6 @@ class MockOpResolver : public MicroOpResolver { // ParseOpData. return ParseOpData; } - - TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, - TfLiteRegistration* registration) override { - // This function is currently not used in the tests. - return kTfLiteError; - } - - TfLiteStatus AddCustom(const char* name, - TfLiteRegistration* registration) override { - // This function is currently not used in the tests. - return kTfLiteError; - } }; } // namespace diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index 34768ae3cb8..bedceb71e4c 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -68,63 +68,13 @@ class MicroMutableOpResolver : public MicroOpResolver { return nullptr; } - // The Add* functions below add the various Builtin operators to the - // MicroMutableOpResolver object. + // Registers a Custom Operator with the MicroOpResolver. // - // This API is currently experimental (and only supported for a small subset - // of operators). It will soon be preferred over the AddBuiltin override of - // the MicroOpResolver interface for the following reason: - // * If all calls to AddBuiltin for an application use this API, the code - // size will be smaller by 5-8K (compared to the using the AddBuiltin - // override). - - TfLiteStatus AddDequantize() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. - return AddBuiltin(BuiltinOperator_DEQUANTIZE, - *tflite::ops::micro::Register_DEQUANTIZE(), ParseOpData); - } - - TfLiteStatus AddFullyConnected() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. - return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, - *tflite::ops::micro::Register_FULLY_CONNECTED(), - ParseOpData); - } - - TfLiteStatus AddQuantize() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. - return AddBuiltin(BuiltinOperator_QUANTIZE, - *tflite::ops::micro::Register_QUANTIZE(), ParseOpData); - } - - TfLiteStatus AddSoftmax() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. - return AddBuiltin(BuiltinOperator_SOFTMAX, - *tflite::ops::micro::Register_SOFTMAX(), ParseOpData); - } - - TfLiteStatus AddSvdf() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. - return AddBuiltin(BuiltinOperator_SVDF, - *tflite::ops::micro::Register_SVDF(), ParseOpData); - } - - TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, - TfLiteRegistration* registration) override { - TFLITE_DCHECK(registration != nullptr); - // For code that is not switched over to the new selective registration of - // the parse function, we pass in ParseOpData. This allows for backwards - // compatibility. - return AddBuiltin(op, *registration, ParseOpData); - } - - TfLiteStatus AddCustom(const char* name, - TfLiteRegistration* registration) override { + // Only the first call for a given name will be successful. i.e. if this + // function is called again for a previously added Custom Operator, the + // MicroOpResolver will be unchanged and this function will return + // kTfLiteError. + TfLiteStatus AddCustom(const char* name, TfLiteRegistration* registration) { if (registrations_len_ >= tOpCount) { if (error_reporter_) { TF_LITE_REPORT_ERROR( @@ -154,6 +104,91 @@ class MicroMutableOpResolver : public MicroOpResolver { return kTfLiteOk; } + // Registers a Builtin Operator with the MicroOpResolver. + // + // Only the first call for a given BuiltinOperator enum will be successful. + // i.e. if this function is called again for a previously added + // BuiltinOperator, the MicroOpResolver will be unchanged and this function + // will return kTfLiteError. + // + // TODO(b/149408647): remove this API once the BuiltinOperator specific Add + // functions are fully implemented. + TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, + TfLiteRegistration* registration) { + TFLITE_DCHECK(registration != nullptr); + // For code that is not switched over to the new selective registration of + // the parse function, we pass in ParseOpData. This allows for backwards + // compatibility. + return AddBuiltin(op, *registration, ParseOpData); + } + + // The Add* functions below add the various Builtin operators to the + // MicroMutableOpResolver object. + // + // This API is currently experimental (and only supported for a small subset + // of operators). It will soon be preferred over the AddBuiltin function for + // the following reason: + // * If all calls to AddBuiltin for an application use this API, the code + // size will be smaller by 5-8K (compared to the using the AddBuiltin + // override). + + TfLiteStatus AddConv2D() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_CONV_2D, + *tflite::ops::micro::Register_CONV_2D(), ParseOpData); + } + + TfLiteStatus AddDequantize() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_DEQUANTIZE, + *tflite::ops::micro::Register_DEQUANTIZE(), ParseOpData); + } + + TfLiteStatus AddFullyConnected() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, + *tflite::ops::micro::Register_FULLY_CONNECTED(), + ParseOpData); + } + + TfLiteStatus AddLogistic() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_LOGISTIC, + *tflite::ops::micro::Register_LOGISTIC(), ParseOpData); + } + + TfLiteStatus AddQuantize() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_QUANTIZE, + *tflite::ops::micro::Register_QUANTIZE(), ParseOpData); + } + + TfLiteStatus AddReshape() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_RESHAPE, + *tflite::ops::micro::Register_RESHAPE(), ParseOpData); + } + + TfLiteStatus AddSoftmax() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_SOFTMAX, + *tflite::ops::micro::Register_SOFTMAX(), ParseOpData); + } + + TfLiteStatus AddSvdf() { + // TODO(b/149408647): Replace ParseOpData with the operator specific parse + // function once cl/313453102 lands. + return AddBuiltin(BuiltinOperator_SVDF, + *tflite::ops::micro::Register_SVDF(), ParseOpData); + } + unsigned int GetRegistrationLength() { return registrations_len_; } private: diff --git a/tensorflow/lite/micro/micro_op_resolver.h b/tensorflow/lite/micro/micro_op_resolver.h index 0f5528d7b70..9b2b70cb910 100644 --- a/tensorflow/lite/micro/micro_op_resolver.h +++ b/tensorflow/lite/micro/micro_op_resolver.h @@ -44,27 +44,6 @@ class MicroOpResolver : public OpResolver { BuiltinDataAllocator* allocator, void** builtin_data); - // Registers a Builtin Operator with the MicroOpResolver. - // - // Only the first call for a given BuiltinOperator enum will be successful. - // i.e. if this function is called again for a previously added - // BuiltinOperator, the MicroOpResolver will be unchanged and this function - // will return kTfLiteError. - // - // TODO(b/149408647): remove this API once the templated AddBuiltin API in - // MicroMutableOpResolver is properly implemented. - virtual TfLiteStatus AddBuiltin(tflite::BuiltinOperator op, - TfLiteRegistration* registration) = 0; - - // Registers a Custom Operator with the MicroOpResolver. - // - // Only the first call for a given name will be successful. i.e. if this - // function is called again for a previously added Custom Operator, the - // MicroOpResolver will be unchanged and this function will return - // kTfLiteError. - virtual TfLiteStatus AddCustom(const char* name, - TfLiteRegistration* registration) = 0; - // Returns the Op registration struct corresponding to the enum code from the // flatbuffer schema. Returns nullptr if the op is not found or if op == // BuiltinOperator_CUSTOM. From d8fd396ec1c48900d42340f27e18a45c31e7530f Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Mon, 8 Jun 2020 16:03:34 -0700 Subject: [PATCH 084/178] Add keras benchmarks to "keras/benchmark" dir. PiperOrigin-RevId: 315372798 Change-Id: I550a46348cb640cfc5980cf384f375f537b5ba15 --- tensorflow/python/keras/benchmark/BUILD | 33 ++++ .../benchmark/keras_cpu_benchmark_test.py | 159 ++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 tensorflow/python/keras/benchmark/BUILD create mode 100644 tensorflow/python/keras/benchmark/keras_cpu_benchmark_test.py diff --git a/tensorflow/python/keras/benchmark/BUILD b/tensorflow/python/keras/benchmark/BUILD new file mode 100644 index 00000000000..f9dd2d073c5 --- /dev/null +++ b/tensorflow/python/keras/benchmark/BUILD @@ -0,0 +1,33 @@ +# Description: +# Implementation of Keras benchmarks. + +package( + default_visibility = ["//visibility:public"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files(["LICENSE"]) + +# To run CPU benchmarks: +# bazel run -c opt benchmarks_test -- --benchmarks=. + +# To run GPU benchmarks: +# bazel run --config=cuda -c opt --copt="-mavx" benchmarks_test -- \ +# --benchmarks=. + +# To run a subset of benchmarks using --benchmarks flag. +# --benchmarks: the list of benchmarks to run. The specified value is interpreted +# as a regular expression and any benchmark whose name contains a partial match +# to the regular expression is executed. +# e.g. --benchmarks=".*lstm*." will run all lstm layer related benchmarks. + +py_test( + name = "keras_cpu_benchmark_test", + size = "large", + srcs = ["keras_cpu_benchmark_test.py"], + python_version = "PY3", + deps = [ + "//tensorflow/python/keras", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/python/keras/benchmark/keras_cpu_benchmark_test.py b/tensorflow/python/keras/benchmark/keras_cpu_benchmark_test.py new file mode 100644 index 00000000000..43e2470cf6b --- /dev/null +++ b/tensorflow/python/keras/benchmark/keras_cpu_benchmark_test.py @@ -0,0 +1,159 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Benchmark tests for CPU performance of Keras models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import timeit + +import numpy as np +import six + +from tensorflow.python import keras +from tensorflow.python.platform import benchmark +from tensorflow.python.platform import test + +_NUM_EPOCHS = 4 + +# Dataset for benchmark +_MLP_X = np.random.random((5000, 784)) +_MLP_Y = np.random.random((5000, 10)) + +_CONVNET_X = np.random.random((5000, 28, 28, 1)) +_CONVNET_Y = np.random.random((5000, 10)) + +_LSTM_X = np.random.randint(0, 1999, size=(2500, 100)) +_LSTM_Y = np.random.random((2500, 1)) + + +class TimerCallback(keras.callbacks.Callback): + + def __init__(self): + self.times = [] + self.timer = timeit.default_timer + self.startup_time = timeit.default_timer() + self.recorded_startup = False + + def on_epoch_begin(self, e, logs): + self.epoch_start_time = self.timer() + + def on_batch_end(self, e, logs): + if not self.recorded_startup: + self.startup_time = self.timer() - self.startup_time + self.recorded_startup = True + + def on_epoch_end(self, e, logs): + self.times.append(self.timer() - self.epoch_start_time) + + +class KerasModelCPUBenchmark( + six.with_metaclass(benchmark.ParameterizedBenchmark, test.Benchmark)): + + # Set parameters for paramerized benchmark. + _benchmark_parameters = [ + ('bs_32', 32, 3), ('bs_64', 64, 2), ('bs_128', 128, 2), + ('bs_256', 256, 1), ('bs_512', 512, 1)] + + def _measure_performance(self, model_fn, x, y, batch_size=32, + run_iters=4): + build_time_list, compile_time_list, startup_time_list = [], [], [] + avg_epoch_time_list, wall_time_list, exp_per_sec_list = [], [], [] + total_num_examples = y.shape[0] * _NUM_EPOCHS + + for _ in range(run_iters): + timer = timeit.default_timer + t0 = timer() + model = model_fn() + build_time = timer() - t0 + + t1 = timer() + model.compile('rmsprop', 'binary_crossentropy') + compile_time = timer() - t1 + + cbk = TimerCallback() + t2 = timer() + model.fit(x, y, epochs=_NUM_EPOCHS, batch_size=batch_size, + callbacks=[cbk], verbose=0) + end_time = timer() + + build_time_list.append(build_time) + compile_time_list.append(compile_time) + startup_time_list.append(cbk.startup_time) + avg_epoch_time_list.append(np.mean(cbk.times[1:])) + wall_time_list.append(end_time - t0) + exp_per_sec_list.append(total_num_examples / (end_time - t2)) + + results = {'build_time': np.mean(build_time_list), + 'compile_time': np.mean(compile_time_list), + 'startup_time': np.mean(startup_time_list), + 'avg_epoch_time': np.mean(avg_epoch_time_list), + 'wall_time': np.mean(wall_time_list), + 'exp_per_sec': np.mean(exp_per_sec_list)} + + self.report_benchmark( + iters=_NUM_EPOCHS, + wall_time=results['wall_time'], + extras=results) + + def _mnist_mlp(self): + model = keras.Sequential() + model.add(keras.layers.Dense(512, activation='relu', input_shape=(784,))) + model.add(keras.layers.Dropout(0.2)) + model.add(keras.layers.Dense(512, activation='relu')) + model.add(keras.layers.Dropout(0.2)) + model.add(keras.layers.Dense(10, activation='softmax')) + + return model + + def _mnist_convnet(self): + model = keras.Sequential() + model.add( + keras.layers.Conv2D( + 32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))) + model.add(keras.layers.Conv2D(64, (3, 3), activation='relu')) + model.add(keras.layers.MaxPooling2D(pool_size=(2, 2))) + model.add(keras.layers.Dropout(0.25)) + model.add(keras.layers.Flatten()) + model.add(keras.layers.Dense(128, activation='relu')) + model.add(keras.layers.Dropout(0.5)) + model.add(keras.layers.Dense(10, activation='softmax')) + + return model + + def _imdb_lstm(self): + model = keras.Sequential() + model.add(keras.layers.Embedding(20000, 128)) + model.add(keras.layers.LSTM(128, dropout=0.2, recurrent_dropout=0.2)) + model.add(keras.layers.Dense(1, activation='sigmoid')) + + return model + + def benchmark_mnist_mlp(self, batch_size, run_iters): + self._measure_performance(self._mnist_mlp, _MLP_X, _MLP_Y, + batch_size=batch_size, run_iters=run_iters) + + def benchmark_mnist_convnet(self, batch_size, run_iters): + self._measure_performance(self._mnist_convnet, _CONVNET_X, _CONVNET_Y, + batch_size=batch_size, run_iters=run_iters) + + def benchmark_imdb_lstm(self, batch_size, run_iters): + self._measure_performance(self._imdb_lstm, _LSTM_X, _LSTM_Y, + batch_size=batch_size, run_iters=run_iters) + + +if __name__ == '__main__': + test.main() From bb08729daa2fa049393c1392f6e98408a7305946 Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Mon, 8 Jun 2020 16:31:03 -0700 Subject: [PATCH 085/178] Switch to the operator specific parse functions. PiperOrigin-RevId: 315377827 Change-Id: I7363e3b77a3b6de97ca1bbb90cbaaf30af7bfb6e --- .../lite/micro/micro_mutable_op_resolver.h | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tensorflow/lite/micro/micro_mutable_op_resolver.h b/tensorflow/lite/micro/micro_mutable_op_resolver.h index bedceb71e4c..298d2661d9d 100644 --- a/tensorflow/lite/micro/micro_mutable_op_resolver.h +++ b/tensorflow/lite/micro/micro_mutable_op_resolver.h @@ -140,18 +140,15 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddDequantize() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. return AddBuiltin(BuiltinOperator_DEQUANTIZE, - *tflite::ops::micro::Register_DEQUANTIZE(), ParseOpData); + *tflite::ops::micro::Register_DEQUANTIZE(), + ParseDequantize); } TfLiteStatus AddFullyConnected() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. return AddBuiltin(BuiltinOperator_FULLY_CONNECTED, *tflite::ops::micro::Register_FULLY_CONNECTED(), - ParseOpData); + ParseFullyConnected); } TfLiteStatus AddLogistic() { @@ -162,10 +159,8 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddQuantize() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. return AddBuiltin(BuiltinOperator_QUANTIZE, - *tflite::ops::micro::Register_QUANTIZE(), ParseOpData); + *tflite::ops::micro::Register_QUANTIZE(), ParseQuantize); } TfLiteStatus AddReshape() { @@ -176,17 +171,13 @@ class MicroMutableOpResolver : public MicroOpResolver { } TfLiteStatus AddSoftmax() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. return AddBuiltin(BuiltinOperator_SOFTMAX, - *tflite::ops::micro::Register_SOFTMAX(), ParseOpData); + *tflite::ops::micro::Register_SOFTMAX(), ParseSoftmax); } TfLiteStatus AddSvdf() { - // TODO(b/149408647): Replace ParseOpData with the operator specific parse - // function once cl/313453102 lands. return AddBuiltin(BuiltinOperator_SVDF, - *tflite::ops::micro::Register_SVDF(), ParseOpData); + *tflite::ops::micro::Register_SVDF(), ParseSvdf); } unsigned int GetRegistrationLength() { return registrations_len_; } From 56c7861736f5322cc0e10433f62b7522926273f7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 16:32:25 -0700 Subject: [PATCH 086/178] Add more docs on using TFLite through C++ PiperOrigin-RevId: 315378099 Change-Id: I1ab60a8a2bbed768c2714444f48f173b006fe540 --- tensorflow/lite/g3doc/guide/android.md | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/g3doc/guide/android.md b/tensorflow/lite/g3doc/guide/android.md index b22ea13d722..ba1bc46e8b4 100644 --- a/tensorflow/lite/g3doc/guide/android.md +++ b/tensorflow/lite/g3doc/guide/android.md @@ -262,9 +262,24 @@ Note that the `0.1.100` version here is purely for the sake of testing/development. With the local AAR installed, you can use the standard [TensorFlow Lite Java inference APIs](../guide/inference.md) in your app code. -##### Build C++ libraries +## Build Android app using C++ -If you want to use TFLite through C++ libraries, you can build the shared +There are two ways to use TFLite through C++ if you build your app with the NDK: + +### Use TFLite C API + +This is the *recommended* approach. Download the +[TensorFlow Lite AAR hosted at JCenter](https://bintray.com/google/tensorflow/tensorflow-lite), +rename it to `tensorflow-lite-*.zip`, and unzip it. You must include the three +header files in `headers/tensorflow/lite/c/` folder and the relevant +`libtensorflowlite_jni.so` dynamic library in `jni/` folder in your NDK project. + +The `c_api.h` header file contains basic documentation about using the TFLite C +API. + +### Use TFLite C++ API + +If you want to use TFLite through C++ API, you can build the C++ shared libraries: 32bit armeabi-v7a: @@ -278,3 +293,9 @@ bazel build -c opt --config=android_arm //tensorflow/lite:libtensorflowlite.so ```sh bazel build -c opt --config=android_arm64 //tensorflow/lite:libtensorflowlite.so ``` + +Currently, there is no straightforward way to extract all header files needed, +so you must include all header files in `tensorflow/lite/` from the TensorFlow +repository. Additionally, you will need header files from +[FlatBUffers](https://github.com/google/flatbuffers) and +[Abseil](https://github.com/abseil/abseil-cpp). From b546504c58de7e118e6805d351dc0e2e5d05e596 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Mon, 8 Jun 2020 16:34:08 -0700 Subject: [PATCH 087/178] Move identity_fuzz to new location. Add linkstatic=1 to fuzz target. PiperOrigin-RevId: 315378448 Change-Id: I1df43a32fa81f2c9ff34aa2defbdb431831244ca --- tensorflow/core/kernels/fuzzing/BUILD | 2 -- tensorflow/security/fuzzing/BUILD | 14 ++++++++++++++ .../kernels => security}/fuzzing/identity_fuzz.cc | 0 tensorflow/security/fuzzing/tf_fuzzing.bzl | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) rename tensorflow/{core/kernels => security}/fuzzing/identity_fuzz.cc (100%) diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD index a27049a0bec..4133462cad5 100644 --- a/tensorflow/core/kernels/fuzzing/BUILD +++ b/tensorflow/core/kernels/fuzzing/BUILD @@ -17,8 +17,6 @@ cc_library( ], ) -tf_ops_fuzz_target_lib("identity") - tf_ops_fuzz_target_lib("string_to_number") tf_oss_fuzz_corpus("string_to_number") diff --git a/tensorflow/security/fuzzing/BUILD b/tensorflow/security/fuzzing/BUILD index 887e1a23cdf..a2a68ed898f 100644 --- a/tensorflow/security/fuzzing/BUILD +++ b/tensorflow/security/fuzzing/BUILD @@ -15,3 +15,17 @@ tf_fuzz_target( name = "demo_fuzz", srcs = ["demo_fuzz.cc"], ) + +# A trivial fuzzer with no pre-specified corpus. +# TODO(mihaimaruseac): Move fuzz_session and the op fuzzers to a subdirectory +tf_fuzz_target( + name = "identity_fuzz", + srcs = ["identity_fuzz.cc"], + deps = [ + "//tensorflow/cc:cc_ops", + "//tensorflow/core/kernels/fuzzing:fuzz_session", + # Needed only to transitiviely link dependencies + "//tensorflow/cc:scope", + "//tensorflow/core:core_cpu", + ], +) diff --git a/tensorflow/core/kernels/fuzzing/identity_fuzz.cc b/tensorflow/security/fuzzing/identity_fuzz.cc similarity index 100% rename from tensorflow/core/kernels/fuzzing/identity_fuzz.cc rename to tensorflow/security/fuzzing/identity_fuzz.cc diff --git a/tensorflow/security/fuzzing/tf_fuzzing.bzl b/tensorflow/security/fuzzing/tf_fuzzing.bzl index 63a8cbac704..b76c22b016a 100644 --- a/tensorflow/security/fuzzing/tf_fuzzing.bzl +++ b/tensorflow/security/fuzzing/tf_fuzzing.bzl @@ -76,5 +76,6 @@ def tf_fuzz_target( deps = deps, # TODO(mihaimaruseac): fuzzing lib? data = data, # TODO(mihaimaruseac): dict, corpus, parsers?? tags = tags, # TODO(mihaimaruseac): fuzzing tags? + linkstatic = 1, **kwargs ) From 939772a64eb30fe7ccd632547d98f1511de87637 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Mon, 8 Jun 2020 16:50:13 -0700 Subject: [PATCH 088/178] Fix connectivity metadata missing issue on functional model. PiperOrigin-RevId: 315381292 Change-Id: I3a7e61a0afbda0ae1984a4152dd297e350b29775 --- tensorflow/python/keras/engine/functional.py | 3 +++ tensorflow/python/keras/engine/functional_test.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/python/keras/engine/functional.py b/tensorflow/python/keras/engine/functional.py index 741cc831f02..0ef4840b651 100644 --- a/tensorflow/python/keras/engine/functional.py +++ b/tensorflow/python/keras/engine/functional.py @@ -544,6 +544,7 @@ class Functional(training_lib.Model): t_rank = t_shape.rank ref_shape = ref_input.shape ref_rank = ref_shape.rank + keras_history = getattr(tensor, '_keras_history', None) if t_rank is not None and ref_rank is not None: # Should squeeze last dimension. # True if tensor is (BATCH, ..., 1) and reference is (BATCH, ...). @@ -553,6 +554,8 @@ class Functional(training_lib.Model): # True if tensor is (BATCH, ...) and reference is (BATCH, ..., 1). elif (t_rank == ref_rank - 1 and ref_shape[-1] == 1): tensor = array_ops.expand_dims_v2(tensor, axis=-1) + if keras_history is not None: # Restore keras history. + tensor._keras_history = keras_history # Add shape hints to Tensors that may have None shape dims but have shapes # defined by the `keras.Input` (not applicable in eager mode). diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index b877e81af15..68b40caad9b 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -2068,5 +2068,18 @@ class CacheCorrectnessTest(keras_parameterized.TestCase): # `None` value passed during construction is overridden. self.assertAllEqual(network(x, training=False), x * 0.0) + def test_keras_history_propagation_(self): + for input_shape in [(1,), (1, 1)]: + sub_in = input_layer_lib.Input((1,)) + relu_layer = layers.ReLU() + sub_out = relu_layer(sub_in) + submodel = functional.Functional(sub_in, sub_out) + self.assertLen(relu_layer._inbound_nodes, 1) + + inp = input_layer_lib.Input(input_shape) + submodel(inp) + self.assertLen(relu_layer._inbound_nodes, 2) + + if __name__ == '__main__': test.main() From 4f2d979e1c833f96d9b9ae7bfa3d5d2e6d81b61a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 8 Jun 2020 17:16:38 -0700 Subject: [PATCH 089/178] Routine dead code clean-up. PiperOrigin-RevId: 315385914 Change-Id: I5c3be3a2b221d65197a375dbee99731ddc130438 --- .../saved_model/integration_tests/integration_scripts.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tensorflow/examples/saved_model/integration_tests/integration_scripts.py b/tensorflow/examples/saved_model/integration_tests/integration_scripts.py index b4e37fba5bc..6f1ccfa2f05 100644 --- a/tensorflow/examples/saved_model/integration_tests/integration_scripts.py +++ b/tensorflow/examples/saved_model/integration_tests/integration_scripts.py @@ -34,7 +34,6 @@ import subprocess import sys from absl import app -from absl import flags as absl_flags import tensorflow.compat.v2 as tf from tensorflow.python.platform import tf_logging as logging @@ -54,14 +53,6 @@ class TestCase(tf.test.TestCase): for flag_key, flag_value in flags.items(): command_parts.append("--%s=%s" % (flag_key, flag_value)) - # TODO(b/143247229): Remove forwarding this flag once the BUILD rule - # `distribute_py_test()` stops setting it. - deepsea_flag_name = "register_deepsea_platform" - deepsea_flag_value = getattr(absl_flags.FLAGS, deepsea_flag_name, None) - if deepsea_flag_value is not None: - command_parts.append("--%s=%s" % (deepsea_flag_name, - str(deepsea_flag_value).lower())) - env = dict(TF2_BEHAVIOR="enabled", SCRIPT_NAME=script_name) logging.info("Running %s with added environment variables %s" % (command_parts, env)) From 2edb0fe27f646219f78ee787c84cb1755e3a39e4 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 8 Jun 2020 17:33:39 -0700 Subject: [PATCH 090/178] [XLA:SPMD] Limit halo exchange to be smaller than replication. PiperOrigin-RevId: 315388562 Change-Id: Ieb83f2765d493065fd855988f08c083101bb3693 --- .../xla/service/spmd/spmd_partitioner_util.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc index df7597628af..3354a9c3233 100644 --- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc +++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner_util.cc @@ -413,6 +413,15 @@ absl::optional ExchangeHalo( std::vector concat_pieces; int64 max_left_halo_size = left_halo_size_function.MaxInRange(1, shard_count); + int64 max_right_halo_size = + right_halo_size_function.MaxInRange(0, shard_count - 1); + if (max_left_halo_size + max_right_halo_size + input_shard_size >= + input_shard_size * shard_count && + (max_left_halo_size > input_shard_size || + max_right_halo_size > input_shard_size)) { + return absl::nullopt; + } + // Left halo. for (int64 i = CeilOfRatio(max_left_halo_size, input_shard_size) - 1; i >= 0; --i) { std::vector> source_target_pairs; @@ -447,8 +456,6 @@ absl::optional ExchangeHalo( concat_pieces.push_back(hlo); // Right halo. - int64 max_right_halo_size = - right_halo_size_function.MaxInRange(0, shard_count - 1); for (int64 i = 0; i < CeilOfRatio(max_right_halo_size, input_shard_size); ++i) { std::vector> source_target_pairs; From 2cceeea2649e3ede9a5be78748dfd11a4837a9ad Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Mon, 8 Jun 2020 18:09:41 -0700 Subject: [PATCH 091/178] Use switch statements in lambdas passed to XEventVisitor::ForEachStat. Also add utility to parse tensor shapes. PiperOrigin-RevId: 315393466 Change-Id: I23a33867f132a3a30617315e79911780143e815e --- .../convert/xplane_to_kernel_stats_db.cc | 29 ++++--- .../convert/xplane_to_memory_profile.cc | 81 +++++++++++-------- .../profiler/convert/xplane_to_step_events.cc | 36 ++++++--- .../convert/xplane_to_tf_functions.cc | 15 ++-- tensorflow/core/profiler/utils/cost_utils.cc | 25 +++--- tensorflow/core/profiler/utils/tf_op_utils.cc | 8 ++ tensorflow/core/profiler/utils/tf_op_utils.h | 6 ++ 7 files changed, 125 insertions(+), 75 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc index 023d6a73d77..4d42d51cf6c 100644 --- a/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc +++ b/tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.cc @@ -49,18 +49,23 @@ KernelStatsDb ConvertDeviceTraceXPlaneToKernelStatsDb( absl::string_view equation; event.ForEachStat([&](const tensorflow::profiler::XStatVisitor& stat) { - if (stat.Type() == StatType::kLevel0) { - tf_op_fullname = stat.StrOrRefValue(); - } else if (stat.Type() == StatType::kKernelDetails) { - kernel.set_name(event.Name().data(), event.Name().size()); - bool using_tensor_cores = IsKernelUsingTensorCore(event.Name()); - kernel.set_is_kernel_using_tensor_core(using_tensor_cores); - kernel.set_total_duration_ns(event.DurationNs()); - kernel.set_min_duration_ns(event.DurationNs()); - kernel.set_max_duration_ns(event.DurationNs()); - ParseKernelLaunchParams(stat.StrOrRefValue(), &kernel); - } else if (stat.Type() == StatType::kEquation) { - equation = stat.StrOrRefValue(); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kLevel0: + tf_op_fullname = stat.StrOrRefValue(); + break; + case StatType::kKernelDetails: + kernel.set_name(event.Name().data(), event.Name().size()); + kernel.set_is_kernel_using_tensor_core( + IsKernelUsingTensorCore(event.Name())); + kernel.set_total_duration_ns(event.DurationNs()); + kernel.set_min_duration_ns(event.DurationNs()); + kernel.set_max_duration_ns(event.DurationNs()); + ParseKernelLaunchParams(stat.StrOrRefValue(), &kernel); + break; + case StatType::kEquation: + equation = stat.StrOrRefValue(); + break; } }); diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc index 6dfc3478b31..a0353d371d6 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile.cc @@ -146,38 +146,55 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) { ActivityMetadata metadata; std::string memory_id; event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kIndexOnHost || - stat.Type() == StatType::kDeviceOrdinal) { - memory_id = absl::StrFormat("%d", stat.IntValue()); - } else if (stat.Type() == StatType::kAllocatorName) { - memory_id = stat.ToString(); - } else if (stat.Type() == StatType::kBytesReserved) { - stats.bytes_reserved = stat.IntValue(); - } else if (stat.Type() == StatType::kBytesAllocated) { - stats.bytes_allocated = stat.IntValue(); - } else if (stat.Type() == StatType::kBytesAvailable) { - stats.bytes_available = stat.IntValue(); - } else if (stat.Type() == StatType::kFragmentation) { - stats.fragmentation = stat.DoubleValue(); - } else if (stat.Type() == StatType::kPeakBytesInUse) { - stats.peak_bytes_in_use = stat.IntValue(); - } else if (stat.Type() == StatType::kRequestedBytes) { - metadata.requested_bytes = stat.IntValue(); - } else if (stat.Type() == StatType::kAllocationBytes) { - metadata.allocation_bytes = stat.IntValue(); - } else if (stat.Type() == StatType::kAddress) { - metadata.address = stat.IntValue(); - } else if (stat.Type() == StatType::kTfOp) { - metadata.tf_op_name = stat.StrOrRefValue(); - } else if (stat.Type() == StatType::kStepId) { - metadata.step_id = stat.IntValue(); - if (metadata.step_id != 0) (*step_count)[metadata.step_id]++; - } else if (stat.Type() == StatType::kRegionType) { - metadata.region_type = stat.StrOrRefValue(); - } else if (stat.Type() == StatType::kDataType) { - metadata.data_type = stat.IntValue(); - } else if (stat.Type() == StatType::kTensorShapes) { - metadata.tensor_shape = stat.StrOrRefValue(); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kIndexOnHost: + case StatType::kDeviceOrdinal: + memory_id = absl::StrFormat("%d", stat.IntValue()); + break; + case StatType::kAllocatorName: + memory_id = std::string(stat.StrOrRefValue()); + break; + case StatType::kBytesReserved: + stats.bytes_reserved = stat.IntValue(); + break; + case StatType::kBytesAllocated: + stats.bytes_allocated = stat.IntValue(); + break; + case StatType::kBytesAvailable: + stats.bytes_available = stat.IntValue(); + break; + case StatType::kFragmentation: + stats.fragmentation = stat.DoubleValue(); + break; + case StatType::kPeakBytesInUse: + stats.peak_bytes_in_use = stat.IntValue(); + break; + case StatType::kRequestedBytes: + metadata.requested_bytes = stat.IntValue(); + break; + case StatType::kAllocationBytes: + metadata.allocation_bytes = stat.IntValue(); + break; + case StatType::kAddress: + metadata.address = stat.IntValue(); + break; + case StatType::kTfOp: + metadata.tf_op_name = stat.StrOrRefValue(); + break; + case StatType::kStepId: + metadata.step_id = stat.IntValue(); + if (metadata.step_id != 0) (*step_count)[metadata.step_id]++; + break; + case StatType::kRegionType: + metadata.region_type = stat.StrOrRefValue(); + break; + case StatType::kDataType: + metadata.data_type = stat.IntValue(); + break; + case StatType::kTensorShapes: + metadata.tensor_shape = stat.StrOrRefValue(); + break; } }); diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events.cc b/tensorflow/core/profiler/convert/xplane_to_step_events.cc index bfe0ac86ef4..00da02c8116 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events.cc @@ -61,12 +61,17 @@ StepEvents ConvertHostThreadsXLineToStepEvents( int64 group_id = -1; absl::string_view step_name; event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kCorrelationId) { - correlation_id = stat.IntValue(); - } else if (stat.Type() == StatType::kGroupId) { - group_id = stat.IntValue(); - } else if (stat.Type() == StatType::kStepName) { - step_name = stat.StrOrRefValue(); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kCorrelationId: + correlation_id = stat.IntValue(); + break; + case StatType::kGroupId: + group_id = stat.IntValue(); + break; + case StatType::kStepName: + step_name = stat.StrOrRefValue(); + break; } }); if (group_id < 0) return; @@ -126,14 +131,19 @@ StepEvents ConvertDeviceTraceXLineToStepEvents(const XLineVisitor& line) { line.ForEachEvent([&](const XEventVisitor& event) { int64 correlation_id = -1; int64 group_id = -1; - absl::string_view tensor_shapes = ""; + absl::string_view tensor_shapes; event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kCorrelationId) { - correlation_id = stat.IntValue(); - } else if (stat.Type() == StatType::kGroupId) { - group_id = stat.IntValue(); - } else if (stat.Type() == StatType::kTensorShapes) { - tensor_shapes = stat.StrOrRefValue(); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kCorrelationId: + correlation_id = stat.IntValue(); + break; + case StatType::kGroupId: + group_id = stat.IntValue(); + break; + case StatType::kTensorShapes: + tensor_shapes = stat.StrOrRefValue(); + break; } }); diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc index b25cdc4d219..3f3506bc8bf 100644 --- a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc +++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc @@ -162,13 +162,18 @@ class TfFunctionExecutions { explicit TfFunctionExecutions(const XLineVisitor& line) { // Creates points_ and activations_ from line. line.ForEachEvent([&](const XEventVisitor& event) { - std::string mode = ""; + absl::string_view mode; int64 tracing_count = 0; event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) { - if (stat.Type() == StatType::kTfFunctionCall) - mode = std::string(stat.StrOrRefValue()); - if (stat.Type() == StatType::kTfFunctionTracingCount) - tracing_count = stat.IntValue(); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kTfFunctionCall: + mode = stat.StrOrRefValue(); + break; + case StatType::kTfFunctionTracingCount: + tracing_count = stat.IntValue(); + break; + } }); if (mode.empty()) return; diff --git a/tensorflow/core/profiler/utils/cost_utils.cc b/tensorflow/core/profiler/utils/cost_utils.cc index a94f09bb79c..896019f775e 100644 --- a/tensorflow/core/profiler/utils/cost_utils.cc +++ b/tensorflow/core/profiler/utils/cost_utils.cc @@ -87,35 +87,34 @@ grappler::DeviceInfo TfOpRoofLineCostEstimator::GetDeviceInfo( TfOpRoofLineCostEstimator::OpRoofLineStats TfOpRoofLineCostEstimator::Predict( const XEventVisitor& event) { TfOp tf_op; - bool has_shape_stats = false; - std::vector input_tensors; + absl::string_view tensor_shapes; event.ForEachStat([&](const XStatVisitor& stat) { - if (stat.Type() == StatType::kLevel0) { - tf_op = ParseTfOpFullname(stat.StrOrRefValue()); - } else if (stat.Type() == StatType::kTensorShapes) { - has_shape_stats = true; - auto shapes_stats = stat.StrOrRefValue(); - absl::ConsumePrefix(&shapes_stats, "("); - absl::ConsumeSuffix(&shapes_stats, ")"); - input_tensors = absl::StrSplit(shapes_stats, ';'); + if (!stat.Type().has_value()) return; + switch (stat.Type().value()) { + case StatType::kLevel0: + tf_op = ParseTfOpFullname(stat.StrOrRefValue()); + break; + case StatType::kTensorShapes: + tensor_shapes = stat.StrOrRefValue(); + break; } }); // Return empty OpRoofLineStats if shape is not traced or this is not a tf op. - if (tf_op.type.empty() || !has_shape_stats) { + if (tf_op.type.empty() || tensor_shapes.empty()) { return {0ULL, 0ULL, /*inaccurate=*/true}; } grappler::OpContext op_context; op_context.name = std::string(tf_op.type); op_context.op_info.set_op(op_context.name); - for (const auto& tensor : input_tensors) { + for (absl::string_view tensor : ParseTensorShapes(tensor_shapes)) { *op_context.op_info.add_inputs() = GetTensorProperties(tensor); } grappler::Costs costs = PredictCosts(op_context); if (costs.inaccurate) unsupported_ops_.insert(std::string(tf_op.type)); - VLOG(1) << tf_op.type << "[" << absl::StrJoin(input_tensors, ",") << "]" + VLOG(1) << tf_op.type << tensor_shapes << " flops:" << costs.compute_time.count() << " bytes:" << costs.memory_time.count(); diff --git a/tensorflow/core/profiler/utils/tf_op_utils.cc b/tensorflow/core/profiler/utils/tf_op_utils.cc index 630a74c4e47..99ccdaad357 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.cc +++ b/tensorflow/core/profiler/utils/tf_op_utils.cc @@ -23,6 +23,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "absl/strings/strip.h" #include "tensorflow/core/platform/regexp.h" namespace tensorflow { @@ -104,5 +105,12 @@ std::string TfOpEventName(absl::string_view tf_op_fullname) { return TfOpEventName(ParseTfOpFullname(tf_op_fullname)); } +std::vector ParseTensorShapes( + absl::string_view tensor_shapes) { + absl::ConsumePrefix(&tensor_shapes, "("); + absl::ConsumeSuffix(&tensor_shapes, ")"); + return absl::StrSplit(tensor_shapes, ';'); +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/core/profiler/utils/tf_op_utils.h b/tensorflow/core/profiler/utils/tf_op_utils.h index b8af9463d51..cb05a6098fa 100644 --- a/tensorflow/core/profiler/utils/tf_op_utils.h +++ b/tensorflow/core/profiler/utils/tf_op_utils.h @@ -81,6 +81,12 @@ inline bool IsMemcpyHToDOp(absl::string_view tf_op_type) { inline bool IsMemcpyDToHOp(absl::string_view tf_op_type) { return tf_op_type == kMemcpyDToHOp; } + +// Splits a string of tensor shapes in "(shape1;shape2;...)" format, i.e., +// delimited by '(' and ')' and separated by ';', into the individual shapes. +std::vector ParseTensorShapes( + absl::string_view tensor_shapes); + } // namespace profiler } // namespace tensorflow From 926cd57d1f6e8a57c067195f3a0138c90bfff475 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 8 Jun 2020 18:22:55 -0700 Subject: [PATCH 092/178] Check in the bazel toolchain for the new windows RBE image PiperOrigin-RevId: 315395076 Change-Id: I47ff27beaf67cb71c93f7238b8a0aee40bef3838 --- tensorflow/opensource_only.files | 1 + .../preconfig/win/tf_win_08062020/BUILD | 357 +++++ .../armeabi_cc_toolchain_config.bzl | 82 + .../builtin_include_directory_paths_msvc | 14 + .../windows_cc_toolchain_config.bzl | 1342 +++++++++++++++++ 5 files changed, 1796 insertions(+) create mode 100644 third_party/toolchains/preconfig/win/tf_win_08062020/BUILD create mode 100644 third_party/toolchains/preconfig/win/tf_win_08062020/armeabi_cc_toolchain_config.bzl create mode 100644 third_party/toolchains/preconfig/win/tf_win_08062020/builtin_include_directory_paths_msvc create mode 100644 third_party/toolchains/preconfig/win/tf_win_08062020/windows_cc_toolchain_config.bzl diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index d49b1c1e381..5f1f2832cc8 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -250,6 +250,7 @@ tensorflow/third_party/toolchains/preconfig/ubuntu16.04/py/BUILD tensorflow/third_party/toolchains/preconfig/ubuntu16.04/tensorrt5/BUILD tensorflow/third_party/toolchains/preconfig/win/BUILD tensorflow/third_party/toolchains/preconfig/win/bazel_211/BUILD +tensorflow/third_party/toolchains/preconfig/win/tf_win_08062020/BUILD tensorflow/third_party/toolchains/preconfig/win_1803/BUILD tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD tensorflow/third_party/toolchains/preconfig/win_1803/py37/BUILD diff --git a/third_party/toolchains/preconfig/win/tf_win_08062020/BUILD b/third_party/toolchains/preconfig/win/tf_win_08062020/BUILD new file mode 100644 index 00000000000..a0f09ac64eb --- /dev/null +++ b/third_party/toolchains/preconfig/win/tf_win_08062020/BUILD @@ -0,0 +1,357 @@ +# Copyright 2018 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This becomes the BUILD file for @local_config_cc// under Windows. + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_toolchain", "cc_toolchain_suite") +load(":windows_cc_toolchain_config.bzl", "cc_toolchain_config") +load(":armeabi_cc_toolchain_config.bzl", "armeabi_cc_toolchain_config") + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "malloc", +) + +filegroup( + name = "empty", + srcs = [], +) + +filegroup( + name = "mingw_compiler_files", + srcs = [":builtin_include_directory_paths_mingw"], +) + +filegroup( + name = "clangcl_compiler_files", + srcs = [":builtin_include_directory_paths_clangcl"], +) + +filegroup( + name = "msvc_compiler_files", + srcs = [":builtin_include_directory_paths_msvc"], +) + +# Hardcoded toolchain, legacy behaviour. +cc_toolchain_suite( + name = "toolchain", + toolchains = { + "armeabi-v7a|compiler": ":cc-compiler-armeabi-v7a", + "x64_windows|msvc-cl": ":cc-compiler-x64_windows", + "x64_windows|msys-gcc": ":cc-compiler-x64_windows_msys", + "x64_windows|mingw-gcc": ":cc-compiler-x64_windows_mingw", + "x64_windows|clang-cl": ":cc-compiler-x64_windows-clang-cl", + "x64_windows_msys": ":cc-compiler-x64_windows_msys", + "x64_windows": ":cc-compiler-x64_windows", + "armeabi-v7a": ":cc-compiler-armeabi-v7a", + }, +) + +cc_toolchain( + name = "cc-compiler-x64_windows_msys", + all_files = ":empty", + ar_files = ":empty", + as_files = ":mingw_compiler_files", + compiler_files = ":mingw_compiler_files", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 1, + toolchain_config = ":msys_x64", + toolchain_identifier = "msys_x64", +) + +cc_toolchain_config( + name = "msys_x64", + abi_libc_version = "local", + abi_version = "local", + compiler = "msys-gcc", + cpu = "x64_windows", + cxx_builtin_include_directories = [ + "c:/tools/msys64/usr/", + ], + dbg_mode_debug_flag = "/DEBUG:FULL", + fastbuild_mode_debug_flag = "/DEBUG:FASTLINK", + host_system_name = "local", + target_libc = "msys", + target_system_name = "local", + tool_bin_path = "c:/tools/msys64/usr/bin", + tool_paths = { + "ar": "c:/tools/msys64/usr/bin/ar", + "compat-ld": "c:/tools/msys64/usr/bin/compat-ld", + "cpp": "c:/tools/msys64/usr/bin/cpp", + "dwp": "c:/tools/msys64/usr/bin/dwp", + "gcc": "c:/tools/msys64/usr/bin/gcc", + "gcov": "c:/tools/msys64/usr/bin/gcov", + "ld": "c:/tools/msys64/usr/bin/ld", + "nm": "c:/tools/msys64/usr/bin/nm", + "objcopy": "c:/tools/msys64/usr/bin/objcopy", + "objdump": "c:/tools/msys64/usr/bin/objdump", + "strip": "c:/tools/msys64/usr/bin/strip", + }, +) + +toolchain( + name = "cc-toolchain-x64_windows_msys", + exec_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + "@bazel_tools//tools/cpp:msys", + ], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], + toolchain = ":cc-compiler-x64_windows_msys", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain( + name = "cc-compiler-x64_windows_mingw", + all_files = ":empty", + ar_files = ":empty", + as_files = ":mingw_compiler_files", + compiler_files = ":mingw_compiler_files", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 0, + toolchain_config = ":msys_x64_mingw", + toolchain_identifier = "msys_x64_mingw", +) + +cc_toolchain_config( + name = "msys_x64_mingw", + abi_libc_version = "local", + abi_version = "local", + compiler = "mingw-gcc", + cpu = "x64_windows", + cxx_builtin_include_directories = [ + "c:/tools/msys64/mingw64/", + ], + dbg_mode_debug_flag = "/DEBUG:FULL", + fastbuild_mode_debug_flag = "/DEBUG:FASTLINK", + host_system_name = "local", + target_libc = "mingw", + target_system_name = "local", + tool_bin_path = "c:/tools/msys64/mingw64/bin", + tool_paths = { + "ar": "c:/tools/msys64/mingw64/bin/ar", + "compat-ld": "c:/tools/msys64/mingw64/bin/compat-ld", + "cpp": "c:/tools/msys64/mingw64/bin/cpp", + "dwp": "c:/tools/msys64/mingw64/bin/dwp", + "gcc": "c:/tools/msys64/mingw64/bin/gcc", + "gcov": "c:/tools/msys64/mingw64/bin/gcov", + "ld": "c:/tools/msys64/mingw64/bin/ld", + "nm": "c:/tools/msys64/mingw64/bin/nm", + "objcopy": "c:/tools/msys64/mingw64/bin/objcopy", + "objdump": "c:/tools/msys64/mingw64/bin/objdump", + "strip": "c:/tools/msys64/mingw64/bin/strip", + }, +) + +toolchain( + name = "cc-toolchain-x64_windows_mingw", + exec_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + "@bazel_tools//tools/cpp:mingw", + ], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], + toolchain = ":cc-compiler-x64_windows_mingw", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain( + name = "cc-compiler-x64_windows", + all_files = ":empty", + ar_files = ":empty", + as_files = ":msvc_compiler_files", + compiler_files = ":msvc_compiler_files", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 1, + toolchain_config = ":msvc_x64", + toolchain_identifier = "msvc_x64", +) + +cc_toolchain_config( + name = "msvc_x64", + abi_libc_version = "local", + abi_version = "local", + compiler = "msvc-cl", + cpu = "x64_windows", + cxx_builtin_include_directories = [ + "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\ATLMFC\\include", + "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\include", + "C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um", + "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\ucrt", + "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\shared", + "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\um", + "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\winrt", + "C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\cppwinrt", + ], + dbg_mode_debug_flag = "/DEBUG:FULL", + default_link_flags = ["/MACHINE:X64"], + fastbuild_mode_debug_flag = "/DEBUG:FASTLINK", + host_system_name = "local", + msvc_cl_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/cl.exe", + msvc_env_include = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\ATLMFC\\include;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\include;C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\ucrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\shared;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\um;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\winrt;C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\cppwinrt", + msvc_env_lib = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\ATLMFC\\lib\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\lib\\x64;C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\lib\\um\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.18362.0\\ucrt\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\lib\\10.0.18362.0\\um\\x64;", + msvc_env_path = "C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\\\Extensions\\Microsoft\\IntelliCode\\CLI;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\bin\\HostX64\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\VC\\VCPackages;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\TestWindow;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\TeamFoundation\\Team Explorer;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\MSBuild\\Current\\bin\\Roslyn;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Team Tools\\Performance Tools\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Team Tools\\Performance Tools;C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Common\\VSPerfCollectionTools\\vs2019\\\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Common\\VSPerfCollectionTools\\vs2019\\;C:\\Program Files (x86)\\Microsoft SDKs\\Windows\\v10.0A\\bin\\NETFX 4.8 Tools\\x64\\;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\10.0.18362.0\\x64;C:\\Program Files (x86)\\Windows Kits\\10\\bin\\x64;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\\\MSBuild\\Current\\Bin;C:\\Windows\\Microsoft.NET\\Framework64\\v4.0.30319;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\Tools\\;;C:\\Windows\\system32;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\CMake\\bin;C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\Common7\\IDE\\CommonExtensions\\Microsoft\\CMake\\Ninja", + msvc_env_tmp = "C:\\Users\\ContainerAdministrator\\AppData\\Local\\Temp", + msvc_lib_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/lib.exe", + msvc_link_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/link.exe", + msvc_ml_path = "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/ml64.exe", + target_libc = "msvcrt", + target_system_name = "local", + tool_paths = { + "ar": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/lib.exe", + "ml": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/ml64.exe", + "cpp": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/cl.exe", + "gcc": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/cl.exe", + "gcov": "wrapper/bin/msvc_nop.bat", + "ld": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.26.28801/bin/HostX64/x64/link.exe", + "nm": "wrapper/bin/msvc_nop.bat", + "objcopy": "wrapper/bin/msvc_nop.bat", + "objdump": "wrapper/bin/msvc_nop.bat", + "strip": "wrapper/bin/msvc_nop.bat", + }, + toolchain_identifier = "msvc_x64", +) + +toolchain( + name = "cc-toolchain-x64_windows", + exec_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], + toolchain = ":cc-compiler-x64_windows", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain( + name = "cc-compiler-x64_windows-clang-cl", + all_files = ":empty", + ar_files = ":empty", + as_files = ":clangcl_compiler_files", + compiler_files = ":clangcl_compiler_files", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 1, + toolchain_config = ":clang_cl_x64", + toolchain_identifier = "clang_cl_x64", +) + +cc_toolchain_config( + name = "clang_cl_x64", + abi_libc_version = "local", + abi_version = "local", + compiler = "clang-cl", + cpu = "x64_windows", + cxx_builtin_include_directories = [], + dbg_mode_debug_flag = "/DEBUG", + default_link_flags = [ + "/MACHINE:X64", + "/DEFAULTLIB:clang_rt.builtins-x86_64.lib", + ], + fastbuild_mode_debug_flag = "/DEBUG", + host_system_name = "local", + msvc_cl_path = "clang_installation_error.bat", + msvc_env_include = "clang_cl_not_found", + msvc_env_lib = "clang_cl_not_found", + msvc_env_path = "clang_cl_not_found", + msvc_env_tmp = "clang_cl_not_found", + msvc_lib_path = "clang_installation_error.bat", + msvc_link_path = "clang_installation_error.bat", + msvc_ml_path = "clang_installation_error.bat", + target_libc = "msvcrt", + target_system_name = "local", + tool_paths = { + "ar": "clang_installation_error.bat", + "ml": "clang_installation_error.bat", + "cpp": "clang_installation_error.bat", + "gcc": "clang_installation_error.bat", + "gcov": "wrapper/bin/msvc_nop.bat", + "ld": "clang_installation_error.bat", + "nm": "wrapper/bin/msvc_nop.bat", + "objcopy": "wrapper/bin/msvc_nop.bat", + "objdump": "wrapper/bin/msvc_nop.bat", + "strip": "wrapper/bin/msvc_nop.bat", + }, + toolchain_identifier = "clang_cl_x64", +) + +toolchain( + name = "cc-toolchain-x64_windows-clang-cl", + exec_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + "@bazel_tools//tools/cpp:clang-cl", + ], + target_compatible_with = [ + "@platforms//cpu:x86_64", + "@platforms//os:windows", + ], + toolchain = ":cc-compiler-x64_windows-clang-cl", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +cc_toolchain( + name = "cc-compiler-armeabi-v7a", + all_files = ":empty", + ar_files = ":empty", + as_files = ":empty", + compiler_files = ":empty", + dwp_files = ":empty", + linker_files = ":empty", + objcopy_files = ":empty", + strip_files = ":empty", + supports_param_files = 1, + toolchain_config = ":stub_armeabi-v7a", + toolchain_identifier = "stub_armeabi-v7a", +) + +armeabi_cc_toolchain_config(name = "stub_armeabi-v7a") + +toolchain( + name = "cc-toolchain-armeabi-v7a", + exec_compatible_with = [ + ], + target_compatible_with = [ + "@platforms//cpu:arm", + "@platforms//os:android", + ], + toolchain = ":cc-compiler-armeabi-v7a", + toolchain_type = "@bazel_tools//tools/cpp:toolchain_type", +) + +filegroup( + name = "link_dynamic_library", + srcs = ["link_dynamic_library.sh"], +) diff --git a/third_party/toolchains/preconfig/win/tf_win_08062020/armeabi_cc_toolchain_config.bzl b/third_party/toolchains/preconfig/win/tf_win_08062020/armeabi_cc_toolchain_config.bzl new file mode 100644 index 00000000000..94e0720bf6c --- /dev/null +++ b/third_party/toolchains/preconfig/win/tf_win_08062020/armeabi_cc_toolchain_config.bzl @@ -0,0 +1,82 @@ +# Copyright 2019 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Starlark cc_toolchain configuration rule""" + +load( + "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", + "feature", + "tool_path", +) + +def _impl(ctx): + toolchain_identifier = "stub_armeabi-v7a" + host_system_name = "armeabi-v7a" + target_system_name = "armeabi-v7a" + target_cpu = "armeabi-v7a" + target_libc = "armeabi-v7a" + compiler = "compiler" + abi_version = "armeabi-v7a" + abi_libc_version = "armeabi-v7a" + cc_target_os = None + builtin_sysroot = None + action_configs = [] + + supports_pic_feature = feature(name = "supports_pic", enabled = True) + supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True) + features = [supports_dynamic_linker_feature, supports_pic_feature] + + cxx_builtin_include_directories = [] + artifact_name_patterns = [] + make_variables = [] + + tool_paths = [ + tool_path(name = "ar", path = "/bin/false"), + tool_path(name = "compat-ld", path = "/bin/false"), + tool_path(name = "cpp", path = "/bin/false"), + tool_path(name = "dwp", path = "/bin/false"), + tool_path(name = "gcc", path = "/bin/false"), + tool_path(name = "gcov", path = "/bin/false"), + tool_path(name = "ld", path = "/bin/false"), + tool_path(name = "nm", path = "/bin/false"), + tool_path(name = "objcopy", path = "/bin/false"), + tool_path(name = "objdump", path = "/bin/false"), + tool_path(name = "strip", path = "/bin/false"), + ] + + return cc_common.create_cc_toolchain_config_info( + ctx = ctx, + features = features, + action_configs = action_configs, + artifact_name_patterns = artifact_name_patterns, + cxx_builtin_include_directories = cxx_builtin_include_directories, + toolchain_identifier = toolchain_identifier, + host_system_name = host_system_name, + target_system_name = target_system_name, + target_cpu = target_cpu, + target_libc = target_libc, + compiler = compiler, + abi_version = abi_version, + abi_libc_version = abi_libc_version, + tool_paths = tool_paths, + make_variables = make_variables, + builtin_sysroot = builtin_sysroot, + cc_target_os = cc_target_os, + ) + +armeabi_cc_toolchain_config = rule( + implementation = _impl, + attrs = {}, + provides = [CcToolchainConfigInfo], +) diff --git a/third_party/toolchains/preconfig/win/tf_win_08062020/builtin_include_directory_paths_msvc b/third_party/toolchains/preconfig/win/tf_win_08062020/builtin_include_directory_paths_msvc new file mode 100644 index 00000000000..d974c3af038 --- /dev/null +++ b/third_party/toolchains/preconfig/win/tf_win_08062020/builtin_include_directory_paths_msvc @@ -0,0 +1,14 @@ +This file is generated by cc_configure and contains builtin include directories +that msvc reported. This file is a dependency of every compilation action and +changes to it will be reflected in the action cache key. When some of these +paths change, Bazel will make sure to rerun the action, even though none of +declared action inputs or the action commandline changes. + +"C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\ATLMFC\\include" +"C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\Community\\VC\\Tools\\MSVC\\14.26.28801\\include" +"C:\\Program Files (x86)\\Windows Kits\\NETFXSDK\\4.8\\include\\um" +"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\ucrt" +"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\shared" +"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\um" +"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\winrt" +"C:\\Program Files (x86)\\Windows Kits\\10\\include\\10.0.18362.0\\cppwinrt" diff --git a/third_party/toolchains/preconfig/win/tf_win_08062020/windows_cc_toolchain_config.bzl b/third_party/toolchains/preconfig/win/tf_win_08062020/windows_cc_toolchain_config.bzl new file mode 100644 index 00000000000..30571b6a5ac --- /dev/null +++ b/third_party/toolchains/preconfig/win/tf_win_08062020/windows_cc_toolchain_config.bzl @@ -0,0 +1,1342 @@ +# Copyright 2019 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Starlark cc_toolchain configuration rule for Windows""" + +load( + "@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", + "action_config", + "artifact_name_pattern", + "env_entry", + "env_set", + "feature", + "feature_set", + "flag_group", + "flag_set", + "tool", + "tool_path", + "variable_with_value", + "with_feature_set", +) +load("@bazel_tools//tools/build_defs/cc:action_names.bzl", "ACTION_NAMES") + +all_compile_actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.clif_match, + ACTION_NAMES.lto_backend, +] + +all_cpp_compile_actions = [ + ACTION_NAMES.cpp_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.clif_match, +] + +preprocessor_compile_actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.clif_match, +] + +codegen_compile_actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, +] + +all_link_actions = [ + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, +] + +def _use_msvc_toolchain(ctx): + return ctx.attr.cpu == "x64_windows" and (ctx.attr.compiler == "msvc-cl" or ctx.attr.compiler == "clang-cl") + +def _impl(ctx): + if _use_msvc_toolchain(ctx): + artifact_name_patterns = [ + artifact_name_pattern( + category_name = "object_file", + prefix = "", + extension = ".obj", + ), + artifact_name_pattern( + category_name = "static_library", + prefix = "", + extension = ".lib", + ), + artifact_name_pattern( + category_name = "alwayslink_static_library", + prefix = "", + extension = ".lo.lib", + ), + artifact_name_pattern( + category_name = "executable", + prefix = "", + extension = ".exe", + ), + artifact_name_pattern( + category_name = "dynamic_library", + prefix = "", + extension = ".dll", + ), + artifact_name_pattern( + category_name = "interface_library", + prefix = "", + extension = ".if.lib", + ), + ] + else: + artifact_name_patterns = [ + artifact_name_pattern( + category_name = "executable", + prefix = "", + extension = ".exe", + ), + ] + + if _use_msvc_toolchain(ctx): + cpp_link_nodeps_dynamic_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_nodeps_dynamic_library, + implies = [ + "nologo", + "shared_flag", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "default_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + "has_configured_linker_path", + "def_file", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + cpp_link_static_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_static_library, + implies = [ + "nologo", + "archiver_flags", + "input_param_flags", + "linker_param_file", + "msvc_env", + ], + tools = [tool(path = ctx.attr.msvc_lib_path)], + ) + + assemble_action = action_config( + action_name = ACTION_NAMES.assemble, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "sysroot", + ], + tools = [tool(path = ctx.attr.msvc_ml_path)], + ) + + preprocess_assemble_action = action_config( + action_name = ACTION_NAMES.preprocess_assemble, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "nologo", + "msvc_env", + "sysroot", + ], + tools = [tool(path = ctx.attr.msvc_ml_path)], + ) + + c_compile_action = action_config( + action_name = ACTION_NAMES.c_compile, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "default_compile_flags", + "nologo", + "msvc_env", + "parse_showincludes", + "user_compile_flags", + "sysroot", + "unfiltered_compile_flags", + ], + tools = [tool(path = ctx.attr.msvc_cl_path)], + ) + + cpp_compile_action = action_config( + action_name = ACTION_NAMES.cpp_compile, + implies = [ + "compiler_input_flags", + "compiler_output_flags", + "default_compile_flags", + "nologo", + "msvc_env", + "parse_showincludes", + "user_compile_flags", + "sysroot", + "unfiltered_compile_flags", + ], + tools = [tool(path = ctx.attr.msvc_cl_path)], + ) + + cpp_link_executable_action = action_config( + action_name = ACTION_NAMES.cpp_link_executable, + implies = [ + "nologo", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "default_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + cpp_link_dynamic_library_action = action_config( + action_name = ACTION_NAMES.cpp_link_dynamic_library, + implies = [ + "nologo", + "shared_flag", + "linkstamps", + "output_execpath_flags", + "input_param_flags", + "user_link_flags", + "default_link_flags", + "linker_subsystem_flag", + "linker_param_file", + "msvc_env", + "no_stripping", + "has_configured_linker_path", + "def_file", + ], + tools = [tool(path = ctx.attr.msvc_link_path)], + ) + + action_configs = [ + assemble_action, + preprocess_assemble_action, + c_compile_action, + cpp_compile_action, + cpp_link_executable_action, + cpp_link_dynamic_library_action, + cpp_link_nodeps_dynamic_library_action, + cpp_link_static_library_action, + ] + else: + action_configs = [] + + if _use_msvc_toolchain(ctx): + msvc_link_env_feature = feature( + name = "msvc_link_env", + env_sets = [ + env_set( + actions = all_link_actions + + [ACTION_NAMES.cpp_link_static_library], + env_entries = [env_entry(key = "LIB", value = ctx.attr.msvc_env_lib)], + ), + ], + ) + + shared_flag_feature = feature( + name = "shared_flag", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [flag_group(flags = ["/DLL"])], + ), + ], + ) + + determinism_feature = feature( + name = "determinism", + enabled = True, + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = [ + "/wd4117", + "-D__DATE__=\"redacted\"", + "-D__TIMESTAMP__=\"redacted\"", + "-D__TIME__=\"redacted\"", + ] + (["-Wno-builtin-macro-redefined"] if ctx.attr.compiler == "clang-cl" else []), + ), + ], + ), + ], + ) + + sysroot_feature = feature( + name = "sysroot", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["--sysroot=%{sysroot}"], + iterate_over = "sysroot", + expand_if_available = "sysroot", + ), + ], + ), + ], + ) + + unfiltered_compile_flags_feature = feature( + name = "unfiltered_compile_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ["%{unfiltered_compile_flags}"], + iterate_over = "unfiltered_compile_flags", + expand_if_available = "unfiltered_compile_flags", + ), + ], + ), + ], + ) + + compiler_param_file_feature = feature( + name = "compiler_param_file", + ) + + copy_dynamic_libraries_to_binary_feature = feature( + name = "copy_dynamic_libraries_to_binary", + ) + + input_param_flags_feature = feature( + name = "input_param_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["/IMPLIB:%{interface_library_output_path}"], + expand_if_available = "interface_library_output_path", + ), + ], + ), + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["%{libopts}"], + iterate_over = "libopts", + expand_if_available = "libopts", + ), + ], + ), + flag_set( + actions = all_link_actions + + [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + iterate_over = "libraries_to_link", + flag_groups = [ + flag_group( + iterate_over = "libraries_to_link.object_files", + flag_groups = [flag_group(flags = ["%{libraries_to_link.object_files}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file_group", + ), + ), + flag_group( + flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "object_file", + ), + ), + flag_group( + flag_groups = [flag_group(flags = ["%{libraries_to_link.name}"])], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "interface_library", + ), + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["%{libraries_to_link.name}"], + expand_if_false = "libraries_to_link.is_whole_archive", + ), + flag_group( + flags = ["/WHOLEARCHIVE:%{libraries_to_link.name}"], + expand_if_true = "libraries_to_link.is_whole_archive", + ), + ], + expand_if_equal = variable_with_value( + name = "libraries_to_link.type", + value = "static_library", + ), + ), + ], + expand_if_available = "libraries_to_link", + ), + ], + ), + ], + ) + + fastbuild_feature = feature( + name = "fastbuild", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Od", "/Z7"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = [ctx.attr.fastbuild_mode_debug_flag, "/INCREMENTAL:NO"], + ), + ], + ), + ], + implies = ["generate_pdb_file"], + ) + + user_compile_flags_feature = feature( + name = "user_compile_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ["%{user_compile_flags}"], + iterate_over = "user_compile_flags", + expand_if_available = "user_compile_flags", + ), + ], + ), + ], + ) + + archiver_flags_feature = feature( + name = "archiver_flags", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + flags = ["/OUT:%{output_execpath}"], + expand_if_available = "output_execpath", + ), + flag_group( + flags = ["/MACHINE:X64"], + ), + ], + ), + ], + ) + + default_link_flags_feature = feature( + name = "default_link_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ctx.attr.default_link_flags)], + ), + ], + ) + + static_link_msvcrt_feature = feature(name = "static_link_msvcrt") + + dynamic_link_msvcrt_debug_feature = feature( + name = "dynamic_link_msvcrt_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MDd"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrtd.lib"])], + ), + ], + requires = [feature_set(features = ["dbg"])], + ) + + dbg_feature = feature( + name = "dbg", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Od", "/Z7"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = [ctx.attr.dbg_mode_debug_flag, "/INCREMENTAL:NO"], + ), + ], + ), + ], + implies = ["generate_pdb_file"], + ) + + opt_feature = feature( + name = "opt", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/O2"])], + ), + ], + implies = ["frame_pointer"], + ) + + supports_interface_shared_libraries_feature = feature( + name = "supports_interface_shared_libraries", + enabled = True, + ) + + user_link_flags_feature = feature( + name = "user_link_flags", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["%{user_link_flags}"], + iterate_over = "user_link_flags", + expand_if_available = "user_link_flags", + ), + ], + ), + ], + ) + + default_compile_flags_feature = feature( + name = "default_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, + ACTION_NAMES.clif_match, + ], + flag_groups = [ + flag_group( + flags = [ + "/DCOMPILER_MSVC", + "/DNOMINMAX", + "/D_WIN32_WINNT=0x0601", + "/D_CRT_SECURE_NO_DEPRECATE", + "/D_CRT_SECURE_NO_WARNINGS", + "/bigobj", + "/Zm500", + "/EHsc", + "/wd4351", + "/wd4291", + "/wd4250", + "/wd4996", + ], + ), + ], + ), + ], + ) + + msvc_compile_env_feature = feature( + name = "msvc_compile_env", + env_sets = [ + env_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ], + env_entries = [env_entry(key = "INCLUDE", value = ctx.attr.msvc_env_include)], + ), + ], + ) + + preprocessor_defines_feature = feature( + name = "preprocessor_defines", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ], + flag_groups = [ + flag_group( + flags = ["/D%{preprocessor_defines}"], + iterate_over = "preprocessor_defines", + ), + ], + ), + ], + ) + + generate_pdb_file_feature = feature( + name = "generate_pdb_file", + requires = [ + feature_set(features = ["dbg"]), + feature_set(features = ["fastbuild"]), + ], + ) + + output_execpath_flags_feature = feature( + name = "output_execpath_flags", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["/OUT:%{output_execpath}"], + expand_if_available = "output_execpath", + ), + ], + ), + ], + ) + + dynamic_link_msvcrt_no_debug_feature = feature( + name = "dynamic_link_msvcrt_no_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MD"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:msvcrt.lib"])], + ), + ], + requires = [ + feature_set(features = ["fastbuild"]), + feature_set(features = ["opt"]), + ], + ) + + disable_assertions_feature = feature( + name = "disable_assertions", + enabled = True, + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/DNDEBUG"])], + with_features = [with_feature_set(features = ["opt"])], + ), + ], + ) + + has_configured_linker_path_feature = feature(name = "has_configured_linker_path") + + supports_dynamic_linker_feature = feature(name = "supports_dynamic_linker", enabled = True) + + no_stripping_feature = feature(name = "no_stripping") + + linker_param_file_feature = feature( + name = "linker_param_file", + flag_sets = [ + flag_set( + actions = all_link_actions + + [ACTION_NAMES.cpp_link_static_library], + flag_groups = [ + flag_group( + flags = ["@%{linker_param_file}"], + expand_if_available = "linker_param_file", + ), + ], + ), + ], + ) + + ignore_noisy_warnings_feature = feature( + name = "ignore_noisy_warnings", + enabled = True, + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.cpp_link_static_library], + flag_groups = [flag_group(flags = ["/ignore:4221"])], + ), + ], + ) + + no_legacy_features_feature = feature(name = "no_legacy_features") + + parse_showincludes_feature = feature( + name = "parse_showincludes", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_header_parsing, + ], + flag_groups = [flag_group(flags = ["/showIncludes"])], + ), + ], + ) + + static_link_msvcrt_no_debug_feature = feature( + name = "static_link_msvcrt_no_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MT"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmt.lib"])], + ), + ], + requires = [ + feature_set(features = ["fastbuild"]), + feature_set(features = ["opt"]), + ], + ) + + treat_warnings_as_errors_feature = feature( + name = "treat_warnings_as_errors", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/WX"])], + ), + ], + ) + + windows_export_all_symbols_feature = feature(name = "windows_export_all_symbols") + + no_windows_export_all_symbols_feature = feature(name = "no_windows_export_all_symbols") + + include_paths_feature = feature( + name = "include_paths", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ], + flag_groups = [ + flag_group( + flags = ["/I%{quote_include_paths}"], + iterate_over = "quote_include_paths", + ), + flag_group( + flags = ["/I%{include_paths}"], + iterate_over = "include_paths", + ), + flag_group( + flags = ["/I%{system_include_paths}"], + iterate_over = "system_include_paths", + ), + ], + ), + ], + ) + + linkstamps_feature = feature( + name = "linkstamps", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["%{linkstamp_paths}"], + iterate_over = "linkstamp_paths", + expand_if_available = "linkstamp_paths", + ), + ], + ), + ], + ) + + targets_windows_feature = feature( + name = "targets_windows", + enabled = True, + implies = ["copy_dynamic_libraries_to_binary"], + ) + + linker_subsystem_flag_feature = feature( + name = "linker_subsystem_flag", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/SUBSYSTEM:CONSOLE"])], + ), + ], + ) + + static_link_msvcrt_debug_feature = feature( + name = "static_link_msvcrt_debug", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/MTd"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/DEFAULTLIB:libcmtd.lib"])], + ), + ], + requires = [feature_set(features = ["dbg"])], + ) + + frame_pointer_feature = feature( + name = "frame_pointer", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Oy-"])], + ), + ], + ) + + compiler_output_flags_feature = feature( + name = "compiler_output_flags", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.assemble], + flag_groups = [ + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fo%{output_file}", "/Zi"], + expand_if_available = "output_file", + expand_if_not_available = "output_assembly_file", + ), + ], + expand_if_not_available = "output_preprocess_file", + ), + ], + ), + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fo%{output_file}"], + expand_if_not_available = "output_preprocess_file", + ), + ], + expand_if_available = "output_file", + expand_if_not_available = "output_assembly_file", + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["/Fa%{output_file}"], + expand_if_available = "output_assembly_file", + ), + ], + expand_if_available = "output_file", + ), + flag_group( + flag_groups = [ + flag_group( + flags = ["/P", "/Fi%{output_file}"], + expand_if_available = "output_preprocess_file", + ), + ], + expand_if_available = "output_file", + ), + ], + ), + ], + ) + + nologo_feature = feature( + name = "nologo", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_static_library, + ], + flag_groups = [flag_group(flags = ["/nologo"])], + ), + ], + ) + + smaller_binary_feature = feature( + name = "smaller_binary", + enabled = True, + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [flag_group(flags = ["/Gy", "/Gw"])], + with_features = [with_feature_set(features = ["opt"])], + ), + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["/OPT:ICF", "/OPT:REF"])], + with_features = [with_feature_set(features = ["opt"])], + ), + ], + ) + + compiler_input_flags_feature = feature( + name = "compiler_input_flags", + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ], + flag_groups = [ + flag_group( + flags = ["/c", "%{source_file}"], + expand_if_available = "source_file", + ), + ], + ), + ], + ) + + def_file_feature = feature( + name = "def_file", + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [ + flag_group( + flags = ["/DEF:%{def_file_path}", "/ignore:4070"], + expand_if_available = "def_file_path", + ), + ], + ), + ], + ) + + msvc_env_feature = feature( + name = "msvc_env", + env_sets = [ + env_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_static_library, + ], + env_entries = [ + env_entry(key = "PATH", value = ctx.attr.msvc_env_path), + env_entry(key = "TMP", value = ctx.attr.msvc_env_tmp), + env_entry(key = "TEMP", value = ctx.attr.msvc_env_tmp), + ], + ), + ], + implies = ["msvc_compile_env", "msvc_link_env"], + ) + features = [ + no_legacy_features_feature, + nologo_feature, + has_configured_linker_path_feature, + no_stripping_feature, + targets_windows_feature, + copy_dynamic_libraries_to_binary_feature, + default_compile_flags_feature, + msvc_env_feature, + msvc_compile_env_feature, + msvc_link_env_feature, + include_paths_feature, + preprocessor_defines_feature, + parse_showincludes_feature, + generate_pdb_file_feature, + shared_flag_feature, + linkstamps_feature, + output_execpath_flags_feature, + archiver_flags_feature, + input_param_flags_feature, + linker_subsystem_flag_feature, + user_link_flags_feature, + default_link_flags_feature, + linker_param_file_feature, + static_link_msvcrt_feature, + static_link_msvcrt_no_debug_feature, + dynamic_link_msvcrt_no_debug_feature, + static_link_msvcrt_debug_feature, + dynamic_link_msvcrt_debug_feature, + dbg_feature, + fastbuild_feature, + opt_feature, + frame_pointer_feature, + disable_assertions_feature, + determinism_feature, + treat_warnings_as_errors_feature, + smaller_binary_feature, + ignore_noisy_warnings_feature, + user_compile_flags_feature, + sysroot_feature, + unfiltered_compile_flags_feature, + compiler_param_file_feature, + compiler_output_flags_feature, + compiler_input_flags_feature, + def_file_feature, + windows_export_all_symbols_feature, + no_windows_export_all_symbols_feature, + supports_dynamic_linker_feature, + supports_interface_shared_libraries_feature, + ] + else: + targets_windows_feature = feature( + name = "targets_windows", + implies = ["copy_dynamic_libraries_to_binary"], + enabled = True, + ) + + copy_dynamic_libraries_to_binary_feature = feature(name = "copy_dynamic_libraries_to_binary") + + gcc_env_feature = feature( + name = "gcc_env", + enabled = True, + env_sets = [ + env_set( + actions = [ + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ACTION_NAMES.cpp_link_static_library, + ], + env_entries = [ + env_entry(key = "PATH", value = ctx.attr.tool_bin_path), + ], + ), + ], + ) + + default_compile_flags_feature = feature( + name = "default_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, + ACTION_NAMES.clif_match, + ], + flag_groups = [flag_group(flags = ["-std=gnu++0x"])], + ), + ], + ) + + default_link_flags_feature = feature( + name = "default_link_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = all_link_actions, + flag_groups = [flag_group(flags = ["-lstdc++"])], + ), + ], + ) + + supports_dynamic_linker_feature = feature( + name = "supports_dynamic_linker", + enabled = True, + ) + + if ctx.attr.cpu == "x64_windows" and ctx.attr.compiler == "mingw-gcc": + compiler_param_file_feature = feature( + name = "compiler_param_file", + ) + + features = [ + targets_windows_feature, + copy_dynamic_libraries_to_binary_feature, + gcc_env_feature, + default_compile_flags_feature, + compiler_param_file_feature, + default_link_flags_feature, + supports_dynamic_linker_feature, + ] + else: + supports_pic_feature = feature( + name = "supports_pic", + enabled = True, + ) + supports_start_end_lib_feature = feature( + name = "supports_start_end_lib", + enabled = True, + ) + + dbg_feature = feature(name = "dbg") + + opt_feature = feature(name = "opt") + + sysroot_feature = feature( + name = "sysroot", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, + ACTION_NAMES.clif_match, + ACTION_NAMES.cpp_link_executable, + ACTION_NAMES.cpp_link_dynamic_library, + ACTION_NAMES.cpp_link_nodeps_dynamic_library, + ], + flag_groups = [ + flag_group( + flags = ["--sysroot=%{sysroot}"], + expand_if_available = "sysroot", + ), + ], + ), + ], + ) + + fdo_optimize_feature = feature( + name = "fdo_optimize", + flag_sets = [ + flag_set( + actions = [ACTION_NAMES.c_compile, ACTION_NAMES.cpp_compile], + flag_groups = [ + flag_group( + flags = [ + "-fprofile-use=%{fdo_profile_path}", + "-fprofile-correction", + ], + expand_if_available = "fdo_profile_path", + ), + ], + ), + ], + provides = ["profile"], + ) + + user_compile_flags_feature = feature( + name = "user_compile_flags", + enabled = True, + flag_sets = [ + flag_set( + actions = [ + ACTION_NAMES.assemble, + ACTION_NAMES.preprocess_assemble, + ACTION_NAMES.linkstamp_compile, + ACTION_NAMES.c_compile, + ACTION_NAMES.cpp_compile, + ACTION_NAMES.cpp_header_parsing, + ACTION_NAMES.cpp_module_compile, + ACTION_NAMES.cpp_module_codegen, + ACTION_NAMES.lto_backend, + ACTION_NAMES.clif_match, + ], + flag_groups = [ + flag_group( + flags = ["%{user_compile_flags}"], + iterate_over = "user_compile_flags", + expand_if_available = "user_compile_flags", + ), + ], + ), + ], + ) + + features = [ + targets_windows_feature, + copy_dynamic_libraries_to_binary_feature, + gcc_env_feature, + supports_pic_feature, + default_compile_flags_feature, + default_link_flags_feature, + fdo_optimize_feature, + supports_dynamic_linker_feature, + dbg_feature, + opt_feature, + user_compile_flags_feature, + sysroot_feature, + ] + + tool_paths = [ + tool_path(name = name, path = path) + for name, path in ctx.attr.tool_paths.items() + ] + + return cc_common.create_cc_toolchain_config_info( + ctx = ctx, + features = features, + action_configs = action_configs, + artifact_name_patterns = artifact_name_patterns, + cxx_builtin_include_directories = ctx.attr.cxx_builtin_include_directories, + toolchain_identifier = ctx.attr.toolchain_identifier, + host_system_name = ctx.attr.host_system_name, + target_system_name = ctx.attr.target_system_name, + target_cpu = ctx.attr.cpu, + target_libc = ctx.attr.target_libc, + compiler = ctx.attr.compiler, + abi_version = ctx.attr.abi_version, + abi_libc_version = ctx.attr.abi_libc_version, + tool_paths = tool_paths, + ) + +cc_toolchain_config = rule( + implementation = _impl, + attrs = { + "cpu": attr.string(mandatory = True), + "compiler": attr.string(), + "toolchain_identifier": attr.string(), + "host_system_name": attr.string(), + "target_system_name": attr.string(), + "target_libc": attr.string(), + "abi_version": attr.string(), + "abi_libc_version": attr.string(), + "tool_paths": attr.string_dict(), + "cxx_builtin_include_directories": attr.string_list(), + "default_link_flags": attr.string_list(default = []), + "msvc_env_tmp": attr.string(default = "msvc_not_found"), + "msvc_env_path": attr.string(default = "msvc_not_found"), + "msvc_env_include": attr.string(default = "msvc_not_found"), + "msvc_env_lib": attr.string(default = "msvc_not_found"), + "msvc_cl_path": attr.string(default = "vc_installation_error.bat"), + "msvc_ml_path": attr.string(default = "vc_installation_error.bat"), + "msvc_link_path": attr.string(default = "vc_installation_error.bat"), + "msvc_lib_path": attr.string(default = "vc_installation_error.bat"), + "dbg_mode_debug_flag": attr.string(), + "fastbuild_mode_debug_flag": attr.string(), + "tool_bin_path": attr.string(default = "not_found"), + }, + provides = [CcToolchainConfigInfo], +) From a5e2430b3ecc5559301d44b4ac83bd7d1c7744ad Mon Sep 17 00:00:00 2001 From: Joshua Chia Date: Tue, 9 Jun 2020 10:43:43 +0800 Subject: [PATCH 093/178] Updated documentation for GRU & LSTM Using cuDNN in GRU & LSTM require eager execution. --- tensorflow/python/keras/layers/recurrent_v2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py index 14ccbebbce5..33babb54357 100644 --- a/tensorflow/python/keras/layers/recurrent_v2.py +++ b/tensorflow/python/keras/layers/recurrent_v2.py @@ -205,6 +205,7 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU): 5. `use_bias` is `True` 6. `reset_after` is `True` 7. Inputs, if use masking, are strictly right-padded. + 8. Eager execution is enabled in the outermost context. There are two variants of the GRU implementation. The default one is based on [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to hidden @@ -928,6 +929,7 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM): 4. `unroll` is `False` 5. `use_bias` is `True` 6. Inputs, if use masking, are strictly right-padded. + 7. Eager execution is enabled in the outermost context. For example: From d59cc2fcf03b2aec01b4ef4cc4b4b9bc2d3f0b9c Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 8 Jun 2020 20:01:32 -0700 Subject: [PATCH 094/178] [XLA:CPU] [NFC] Simplify datastructures used for allocated buffers PiperOrigin-RevId: 315405851 Change-Id: Ic45c075902b477e93daea227232ff6a35efff8cc --- .../xla/service/cpu/cpu_executable.cc | 149 +++++++++--------- .../compiler/xla/service/cpu/cpu_executable.h | 15 +- 2 files changed, 80 insertions(+), 84 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index d9a328a326e..bba1a3ad610 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -76,11 +76,10 @@ CpuExecutable::CpuExecutable( } StatusOr, - std::vector, std::vector>> CpuExecutable::CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, int device_ordinal, - std::vector arguments) { + absl::Span arguments) { std::vector unowning_buffers( assignment_->Allocations().size()); std::vector owning_buffers( @@ -115,17 +114,13 @@ CpuExecutable::CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, } int64 buffer_size = allocation.size(); - if (!owning_buffers[i].is_null()) { - VLOG(3) << "buffer #" << i - << " is in the preallocated result ShapedBuffer"; - } else { - TF_ASSIGN_OR_RETURN(owning_buffers[i], memory_allocator->Allocate( - device_ordinal, buffer_size)); - unowning_buffers[i] = *owning_buffers[i]; + CHECK(owning_buffers[i].is_null()); + TF_ASSIGN_OR_RETURN(owning_buffers[i], memory_allocator->Allocate( + device_ordinal, buffer_size)); + unowning_buffers[i] = *owning_buffers[i]; - VLOG(3) << "buffer #" << i << " allocated " << buffer_size << " bytes [" - << owning_buffers[i]->opaque() << "]"; - } + VLOG(3) << "buffer #" << i << " allocated " << buffer_size << " bytes [" + << owning_buffers[i]->opaque() << "]"; // Since the output buffer and all the temporary buffers were written into // by the JITed code, msan has no way of knowing their memory was @@ -137,18 +132,8 @@ CpuExecutable::CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, assignment_->GetUniqueTopLevelOutputSlice()); VLOG(3) << "result index: " << result_slice.index(); - - std::vector buffers_to_free; - for (auto& argument : arguments) { - for (auto& index_buffer : *argument.MutableBuffers()) { - auto maybe_owning_buffer = index_buffer.second.Release(); - if (maybe_owning_buffer) { - buffers_to_free.push_back(std::move(*maybe_owning_buffer)); - } - } - } - return std::make_tuple(std::move(unowning_buffers), std::move(owning_buffers), - std::move(buffers_to_free)); + return std::make_tuple(std::move(unowning_buffers), + std::move(owning_buffers)); } Status CpuExecutable::ExecuteComputeFunction( @@ -223,63 +208,63 @@ Status CpuExecutable::ExecuteComputeFunction( return Status::OK(); } -StatusOr CpuExecutable::CreateResultShapedBuffer( +StatusOr CpuExecutable::CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span buffers) { + absl::Span owning_buffers) { se::Stream* stream = run_options->stream(); - ScopedShapedBuffer result_buffer( - /*on_host_shape=*/result_shape(), - /*on_device_shape=*/result_shape(), run_options->allocator(), - stream->parent()->device_ordinal()); + ExecutionOutput result(/*on_host_shape=*/result_shape(), + /*on_device_shape=*/result_shape(), + run_options->allocator(), + stream->parent()->device_ordinal()); const HloInputOutputAliasConfig& input_output_alias = module().input_output_alias_config(); // Move se::OwningDeviceMemory values which contain the array(s) of the result // into the respective location in ScopedShapedBuffer which is returned to the // caller. - TF_RETURN_IF_ERROR(result_buffer.buffers().ForEachMutableElementWithStatus( - [&](const ShapeIndex& index, se::DeviceMemoryBase* device_memory) { - const auto& sources = this->GetRootValueSet().element(index); - // The points to set is unambiguous so the set should be a - // singleton. - CHECK_EQ(1, sources.values().size()); - const HloValue* value_source = sources.values()[0]; - HloInstruction* src = value_source->instruction(); + for (auto& p : result.MutableResult()->buffers()) { + const ShapeIndex& index = p.first; + se::DeviceMemoryBase& device_memory = p.second; + const auto& sources = this->GetRootValueSet().element(index); + // The points to set is unambiguous so the set should be a + // singleton. + CHECK_EQ(1, sources.values().size()); + const HloValue* value_source = sources.values()[0]; + HloInstruction* src = value_source->instruction(); - // The source for this result buffer can be a nested buffer such as - // a tuple element. The source instruction should have a - // non-parameter buffer assigned. - TF_ASSIGN_OR_RETURN( - const BufferAllocation::Slice slice, - this->assignment_->GetUniqueSlice(src, value_source->index())); - const BufferAllocation::Index buffer_index = slice.index(); - se::OwningDeviceMemory& buffer = buffers[buffer_index]; - if (!slice.allocation()->is_entry_computation_parameter()) { - // If the buffer coming out of the result is from a parameter, the - // owning buffer will be null, and that means the caller aliased some - // parameter buffer to an output one (via the - // HloInputOutputAliasConfig API). If that is the case, the caller - // will receive a partially complete scoped shaped buffer, which they - // will have to fill up on return. Unfortunately the interface to the - // execute APIs are ShapedBuffer pointer based, which assumes caller - // ownership, and hence a buffer coming from there cannot be part of - // the new ScopedShapedBuffer we create for the result (which assumes - // ownership). - *device_memory = buffer.Release(); - } else { - auto output_alias = input_output_alias.GetAliasedOutput( - slice.allocation()->parameter_number(), - slice.allocation()->param_shape_index()); - CHECK(output_alias) - << "Output buffer is coming from parameter " - << slice.allocation()->parameter_number() << " at index " - << slice.allocation()->param_shape_index() - << ", but no alias exists"; - CHECK_EQ(*output_alias, index); - } - return Status::OK(); - })); - return std::move(result_buffer); + // The source for this result buffer can be a nested buffer such as + // a tuple element. The source instruction should have a + // non-parameter buffer assigned. + TF_ASSIGN_OR_RETURN( + const BufferAllocation::Slice slice, + this->assignment_->GetUniqueSlice(src, value_source->index())); + const BufferAllocation::Index buffer_index = slice.index(); + se::OwningDeviceMemory& buffer = owning_buffers[buffer_index]; + if (!slice.allocation()->is_entry_computation_parameter()) { + // If the buffer coming out of the result is from a parameter, the + // owning buffer will be null, and that means the caller aliased some + // parameter buffer to an output one (via the + // HloInputOutputAliasConfig API). If that is the case, the caller + // will receive a partially complete scoped shaped buffer, which they + // will have to fill up on return. Unfortunately the interface to the + // execute APIs are ShapedBuffer pointer based, which assumes caller + // ownership, and hence a buffer coming from there cannot be part of + // the new ScopedShapedBuffer we create for the result (which assumes + // ownership). + device_memory = buffer.Release(); + } else { + auto output_alias = input_output_alias.GetAliasedOutput( + slice.allocation()->parameter_number(), + slice.allocation()->param_shape_index()); + CHECK(output_alias) << "Output buffer is coming from parameter " + << slice.allocation()->parameter_number() + << " at index " + << slice.allocation()->param_shape_index() + << ", but no alias exists"; + CHECK_EQ(*output_alias, index); + } + } + return std::move(result); } StatusOr CpuExecutable::ExecuteAsyncOnStream( @@ -313,14 +298,13 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( se::DeviceMemoryAllocator* memory_allocator = run_options->allocator(); std::vector owning_buffers; std::vector unowning_buffers; - std::vector buffers_to_release; TF_ASSIGN_OR_RETURN( - std::tie(unowning_buffers, owning_buffers, buffers_to_release), + std::tie(unowning_buffers, owning_buffers), CreateBufferTable(memory_allocator, stream->parent()->device_ordinal(), - std::move(arguments))); + arguments)); TF_ASSIGN_OR_RETURN( - ScopedShapedBuffer result, + ExecutionOutput result, CreateResultShapedBuffer(run_options, absl::MakeSpan(owning_buffers))); // At this point, `unowning_buffers` contains unowning pointers to all of our @@ -355,7 +339,18 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( std::make_shared>( std::move(owning_buffers)), hlo_execution_profile}); - return ExecutionOutput(std::move(result), std::move(buffers_to_release)); + + // TODO(cheshire): Duplication with other executables. + for (ExecutionInput& argument : arguments) { + for (auto& index_buffer : *argument.MutableBuffers()) { + absl::optional maybe_owning_buffer = + index_buffer.second.Release(); + if (maybe_owning_buffer) { + result.AddToBeReleased(std::move(*maybe_owning_buffer)); + } + } + } + return std::move(result); } /*static*/ int64 CpuExecutable::ShapeSizeBytes(const Shape& shape) { diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 310e30e41f5..7e42ee717f3 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -102,10 +102,10 @@ class CpuExecutable : public Executable { // - buffers_to_free: buffers whose ownership was donated by the caller that // are to be freed by the caller. StatusOr, - std::vector, std::vector>> CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, - int device_ordinal, std::vector arguments); + int device_ordinal, + absl::Span arguments); // Calls the generated function performing the computation with the given // arguments using the supplied buffers. @@ -113,12 +113,13 @@ class CpuExecutable : public Executable { absl::Span buffers, HloExecutionProfile* hlo_execution_profile); - // Creates a ScopedShapedBuffer for holding the result of the computation, - // moving buffers out of allocated_buffers and into the result as appropriate. - // The addresses are set according to buffer assignment. - StatusOr CreateResultShapedBuffer( + // Creates an Execution output holding ScopedShapedBuffer for holding the + // result of the computation, moving buffers out of allocated_buffers and into + // the result as appropriate. The addresses are set according to buffer + // assignment. + StatusOr CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span buffers); + absl::Span owning_buffers); // Returns the instruction value set of the root instruction of the entry // computation. Uses dataflow analysis from buffer assignment. From 125822f42ae05a1e032b438719900258772f58e7 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Mon, 8 Jun 2020 20:06:53 -0700 Subject: [PATCH 095/178] [XLA] Remove kNoAlias aliasing kNoAlias makese no sense, and is redundant, if we already have an optional to designate missing aliasing. PiperOrigin-RevId: 315406577 Change-Id: I1d8b01076efa51ec08226d46c9e52c48e5b3e6f4 --- .../xla/service/hlo_input_output_alias_config.cc | 4 ++-- .../compiler/xla/service/hlo_input_output_alias_config.h | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc index 3e82e3271bb..b55a81ce40f 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -121,10 +121,10 @@ string HloInputOutputAliasConfig::ToString() const { return absl::StrJoin(pieces, "\n"); } -HloInputOutputAliasConfig::AliasKind +absl::optional HloInputOutputAliasConfig::ParameterAliasKind( int64 param_number, const ShapeIndex& param_index) const { - AliasKind kind = AliasKind::kNoAlias; + absl::optional kind; alias_.ForEachElement( [&](const xla::ShapeIndex&, absl::optional alias) { if (alias && alias->parameter_number == param_number && diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index 65ea02b6db0..5e9a6abb574 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -36,7 +36,6 @@ class HloInputOutputAliasConfig { // compilation time by the user, and has to be respected. A kSystemAlias one // might be setup by the compiler, if it decides it is convenient to do so. enum AliasKind { - kNoAlias, kUserAlias, kSystemAlias, }; @@ -68,15 +67,15 @@ class HloInputOutputAliasConfig { AliasKind kind = AliasKind::kUserAlias); // Returns the kind of alias for the given parameter number and parameter - // index. If no alias exists, AliasKind::kNoAlias is returned. - AliasKind ParameterAliasKind(int64 param_number, - const ShapeIndex& param_index) const; + // index. + absl::optional ParameterAliasKind( + int64 param_number, const ShapeIndex& param_index) const; // Returns true if the given parameter is aliased with one of the output // buffers. bool ParameterHasAlias(int64 param_number, const ShapeIndex& param_index) const { - return ParameterAliasKind(param_number, param_index) != AliasKind::kNoAlias; + return ParameterAliasKind(param_number, param_index).has_value(); } // Checks whether the provided output index has already been aliased. From 459afb493f51095a3e1bfe63c01c982555bf4382 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 8 Jun 2020 20:52:17 -0700 Subject: [PATCH 096/178] VLOG(1) buffer allocation stats from gpu_compiler PiperOrigin-RevId: 315411231 Change-Id: Id96519dd8ec69ddf1afadbe81c5a12c47118c778 --- tensorflow/compiler/xla/service/gpu/gpu_compiler.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 78ef59236f7..431c8884b86 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -499,6 +499,8 @@ StatusOr> GpuCompiler::RunBackend( /*allocate_buffers_for_constants=*/true, /*colorer=*/BufferAssigner::DefaultColorer(), /*must_not_live_out=*/{}, GetCanShareBuffer())); + VLOG(1) << "Buffer Assignment Stats " + << buffer_assignment->GetStats().ToString(); DumpHloModuleIfEnabled(*module, *buffer_assignment, "after_optimizations"); IrEmitterContext ir_emitter_context( From 379268e9f4cbccfc46827408a0e67896c75af5b4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 8 Jun 2020 20:58:02 -0700 Subject: [PATCH 097/178] Ignore IsExpensive checks for fusion merging opportunities that save a lot of memory PiperOrigin-RevId: 315411773 Change-Id: I490854f1e82828f3dddd1e17e0943513b147e9d9 --- .../compiler/xla/service/gpu/fusion_merger.cc | 47 ++++++++++++------- .../xla/service/gpu/fusion_merger_test.cc | 31 ++++++++++++ 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index b4ccf758e94..60e4cb84b09 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -210,24 +210,6 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { return Status::OK(); } - // Skip 'fusion' instruction if any of its fused instructions are expensive. - // This is done to avoid the duplication of expensive instructions, which - // would occur if 'fusion' were merged into multiple users. - // - // If 'fusion' has just one user, then an earlier fusion pass chose not to - // fuse this producer/consumer pair (likely because of expensive instruction - // re-use by the consumer), and so we honor that choice here as well. - if (absl::c_any_of(fusion->fused_instructions(), - [](const HloInstruction* instruction) { - return instruction->opcode() != HloOpcode::kParameter && - GpuInstructionFusion::IsExpensive(*instruction); - })) { - VLOG(3) << "Not merging " << fusion->name() - << ": Contains one or more expensive instructions."; - ++num_fail_expensive_fused_instruction_; - return Status::OK(); - } - // Skip 'fusion' instruction if merging it into all users would result in a // net increase in bytes transferred (currently allowing the net bytes // transferred to be exceeded up to ~10% in exchange for eliminating the @@ -244,6 +226,35 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { return Status::OK(); } + // Skip 'fusion' instruction if any of its fused instructions are expensive. + // This is done to avoid the duplication of expensive instructions, which + // would occur if 'fusion' were merged into multiple users. + // + // If 'fusion' has just one user, then an earlier fusion pass chose not to + // fuse this producer/consumer pair (likely because of expensive instruction + // re-use by the consumer), and so we honor that choice here as well. + // + // Moreover, if we are going to save a "lot" in memory bandwidth then we + // ignore how expensive the fusion instructions are. The heuristic used to + // determine "a lot" is the following: merging must reduce memory traffic by a + // factor of 0.3, and the amount of memory accessed must not be entirely + // trivial (above 1K). This likely has room for improvement in the future. + + bool allow_expensive_ops = + merged_to_current_bytes_ratio < 0.3 && current_bytes_transferred > 1024; + + if (!allow_expensive_ops && + absl::c_any_of(fusion->fused_instructions(), + [](const HloInstruction* instruction) { + return instruction->opcode() != HloOpcode::kParameter && + GpuInstructionFusion::IsExpensive(*instruction); + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Contains one or more expensive instructions."; + ++num_fail_expensive_fused_instruction_; + return Status::OK(); + } + // Skip 'fusion' instruction if merging it into at least one of the users // would cause too much code duplication because of inefficiencies in the // fusion emitter. diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index 47fd9bbfb09..42891154c23 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -367,6 +367,37 @@ TEST_F(FusionMergerTest, WillNotMergeIfFusionEmitterIsInefficient) { EXPECT_FALSE(FusionMerger().Run(module.get()).ValueOrDie()); } +TEST_F(FusionMergerTest, WillMergeExpensiveFusionsIfSavesMemory) { + auto module = ParseAndReturnVerifiedModule(R"( + HloModule m + + %f_a (p: f32[]) -> f32[1024,1024,1024] { + %p = f32[] parameter(0) + %b = f32[1024,1024,1024] broadcast(%p), dimensions={} + ROOT %t = f32[1024,1024,1024] tanh(%b) + } + + %f_b (p: f32[1024,1024,1024]) -> f32[1024,1024,1024] { + %p = f32[1024,1024,1024] parameter(0) + ROOT %t = f32[1024,1024,1024] tanh(%p) + } + + %f_c (p: f32[1024,1024,1024]) -> f32[1024,1024,1024] { + %p = f32[1024,1024,1024] parameter(0) + ROOT %t = f32[1024,1024,1024] tanh(%p) + } + + ENTRY entry { + p0 = f32[] parameter(0) + f1 = f32[1024,1024,1024] fusion(p0), kind=kLoop, calls=%f_a + f2 = f32[1024,1024,1024] fusion(f1), kind=kLoop, calls=%f_b + f3 = f32[1024,1024,1024] fusion(f1), kind=kLoop, calls=%f_c + ROOT f4 = f32[1024,1024,1024] add(f2, f3) + })") + .ValueOrDie(); + EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); +} + } // namespace } // namespace gpu } // namespace xla From 1a90749db94af66b712f7fbaf32a8b25792583e9 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 8 Jun 2020 21:23:54 -0700 Subject: [PATCH 098/178] Enable the ability to pass a MicroAllocator instance into a MicroInterpreter instance. This change is a stepping stone to enable users to: 1.) Enable users to use a single MicroAllocator/arena for multiple models. 2.) Enable users to use the new recording allocation APIs for auditing arena allocations. PiperOrigin-RevId: 315414448 Change-Id: Ied1ea56deb73c09bb64b3e41fd3502b5a4cd5bb8 --- tensorflow/lite/micro/BUILD | 1 + .../examples/hello_world/hello_world_test.cc | 6 +- .../examples/hello_world/main_functions.cc | 2 +- .../examples/person_detection/utils/BUILD | 25 +++--- tensorflow/lite/micro/micro_allocator.cc | 73 ++++++++-------- tensorflow/lite/micro/micro_allocator.h | 39 ++++++--- tensorflow/lite/micro/micro_allocator_test.cc | 42 +++++----- tensorflow/lite/micro/micro_interpreter.cc | 65 ++++++++------- tensorflow/lite/micro/micro_interpreter.h | 14 +++- .../lite/micro/recording_micro_allocator.cc | 21 +++++ .../lite/micro/recording_micro_allocator.h | 19 +++-- .../micro/recording_micro_allocator_test.cc | 83 ++++--------------- .../lite/micro/simple_memory_allocator.cc | 2 + 13 files changed, 208 insertions(+), 184 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index 735952f9bee..dbfa6c7aaf1 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -193,6 +193,7 @@ cc_library( ":micro_compatibility", ":micro_framework", "//tensorflow/lite/core/api", + "//tensorflow/lite/kernels/internal:compatibility", ], ) diff --git a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc index 0a447440aea..7ced43211b7 100644 --- a/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc +++ b/tensorflow/lite/micro/examples/hello_world/hello_world_test.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/micro/all_ops_resolver.h" #include "tensorflow/lite/micro/examples/hello_world/model.h" #include "tensorflow/lite/micro/micro_error_reporter.h" @@ -46,16 +45,15 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) { // Minimum arena size, at the time of writing. After allocating tensors // you can retrieve this value by invoking interpreter.arena_used_bytes(). - const int model_arena_size = 2352; + const int model_arena_size = 2468; /* Extra headroom for model + alignment + future interpreter changes */ - const int extra_arena_size = 560 + 16 + 100; + const int extra_arena_size = 570 + 16 + 100; const int tensor_arena_size = model_arena_size + extra_arena_size; uint8_t tensor_arena[tensor_arena_size]; // Build an interpreter to run the model with tflite::MicroInterpreter interpreter(model, resolver, tensor_arena, tensor_arena_size, error_reporter); - // Allocate memory from the tensor_arena for the model's tensors TF_LITE_MICRO_EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteOk); diff --git a/tensorflow/lite/micro/examples/hello_world/main_functions.cc b/tensorflow/lite/micro/examples/hello_world/main_functions.cc index 62db659374d..65bfcb5a013 100644 --- a/tensorflow/lite/micro/examples/hello_world/main_functions.cc +++ b/tensorflow/lite/micro/examples/hello_world/main_functions.cc @@ -36,7 +36,7 @@ int inference_count = 0; // Create an area of memory to use for input, output, and intermediate arrays. // Minimum arena size, at the time of writing. After allocating tensors // you can retrieve this value by invoking interpreter.arena_used_bytes(). -const int kModelArenaSize = 2352; +const int kModelArenaSize = 2468; // Extra headroom for model + alignment + future interpreter changes. const int kExtraArenaSize = 560 + 16 + 100; const int kTensorArenaSize = kModelArenaSize + kExtraArenaSize; diff --git a/tensorflow/lite/micro/examples/person_detection/utils/BUILD b/tensorflow/lite/micro/examples/person_detection/utils/BUILD index 55ce96a26f7..98339572078 100644 --- a/tensorflow/lite/micro/examples/person_detection/utils/BUILD +++ b/tensorflow/lite/micro/examples/person_detection/utils/BUILD @@ -19,15 +19,16 @@ py_library( ], ) -py_test( - name = "raw_to_bitmap_test", - srcs = ["raw_to_bitmap_test.py"], - data = glob(["testdata/**"]), - python_version = "PY3", - tags = ["noubsan"], # TODO(b/144512025): Fix raw_to_bitmap_test to fix ubsan failure. - deps = [ - ":raw_to_bitmap_lib", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - ], -) +# TODO(b/158529664): Re-enable this test by removing the TF python test lib dependency. +# py_test( +# name = "raw_to_bitmap_test", +# srcs = ["raw_to_bitmap_test.py"], +# data = glob(["testdata/**"]), +# python_version = "PY3", +# tags = ["noubsan"], # TODO(b/144512025): Fix raw_to_bitmap_test to fix ubsan failure. +# deps = [ +# ":raw_to_bitmap_lib", +# "//third_party/py/numpy", +# "//tensorflow/python:client_testlib", +# ], +# ) diff --git a/tensorflow/lite/micro/micro_allocator.cc b/tensorflow/lite/micro/micro_allocator.cc index 2289e8390bc..b97870ecc8b 100644 --- a/tensorflow/lite/micro/micro_allocator.cc +++ b/tensorflow/lite/micro/micro_allocator.cc @@ -401,29 +401,6 @@ TfLiteStatus InitializeTfLiteTensorFromFlatbuffer( } // namespace internal -MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, - uint8_t* tensor_arena, size_t arena_size, - ErrorReporter* error_reporter) - : model_(model), - context_(context), - error_reporter_(error_reporter), - active_(false) { - uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment); - if (aligned_arena != tensor_arena) { - TF_LITE_REPORT_ERROR( - error_reporter_, - "%d bytes lost due to alignment. To avoid this loss, please make sure " - "the tensor_arena is 16 bytes aligned.", - aligned_arena - tensor_arena); - } - size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena; - // Creates a root memory allocator managing the arena. The allocator itself - // also locates in the arena buffer. This allocator doesn't need to be - // destructed as it's the root allocator. - memory_allocator_ = SimpleMemoryAllocator::Create( - error_reporter, aligned_arena, aligned_arena_size); -} - MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, SimpleMemoryAllocator* memory_allocator, ErrorReporter* error_reporter) @@ -435,19 +412,44 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model, MicroAllocator::~MicroAllocator() {} -TfLiteStatus MicroAllocator::Init() { - TfLiteStatus status = InitGraphAndContextTensorData(); - // TODO(b/147871299): Consider improving this code. A better way of handling - // failures in the constructor is to have a static function that returns a - // pointer to the class. If allocation failed, a nullptr will be returned. - if (status != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter_, - "MicroAllocator: Failed to initialize."); - active_ = false; - } else { - active_ = true; +MicroAllocator* MicroAllocator::Create(TfLiteContext* context, + const Model* model, + uint8_t* tensor_arena, size_t arena_size, + ErrorReporter* error_reporter) { + uint8_t* aligned_arena = AlignPointerUp(tensor_arena, kBufferAlignment); + if (aligned_arena != tensor_arena) { + TF_LITE_REPORT_ERROR( + error_reporter, + "%d bytes lost due to alignment. To avoid this loss, please make sure " + "the tensor_arena is 16 bytes aligned.", + aligned_arena - tensor_arena); } - return status; + size_t aligned_arena_size = tensor_arena + arena_size - aligned_arena; + return Create(context, model, + SimpleMemoryAllocator::Create(error_reporter, aligned_arena, + aligned_arena_size), + error_reporter); +} + +MicroAllocator* MicroAllocator::Create(TfLiteContext* context, + const Model* model, + SimpleMemoryAllocator* memory_allocator, + ErrorReporter* error_reporter) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(model != nullptr); + TFLITE_DCHECK(memory_allocator != nullptr); + TFLITE_DCHECK(error_reporter != nullptr); + + uint8_t* allocator_buffer = memory_allocator->AllocateFromTail( + sizeof(MicroAllocator), alignof(MicroAllocator)); + MicroAllocator* allocator = new (allocator_buffer) + MicroAllocator(context, model, memory_allocator, error_reporter); + if (allocator->InitGraphAndContextTensorData() != kTfLiteOk) { + TF_LITE_REPORT_ERROR(error_reporter, + "MicroAllocator: Failed to initialize model graph."); + return nullptr; + } + return allocator; } TfLiteStatus MicroAllocator::PrepareFromFlatbuffer( @@ -612,6 +614,7 @@ TfLiteStatus MicroAllocator::InitGraphAndContextTensorData() { TF_LITE_ENSURE_STATUS(AllocateTfLiteTensorArray()); TF_LITE_ENSURE_STATUS(PopulateTfLiteTensorArrayFromFlatbuffer()); + active_ = true; return kTfLiteOk; } diff --git a/tensorflow/lite/micro/micro_allocator.h b/tensorflow/lite/micro/micro_allocator.h index a56bef02bc8..e8659b006b1 100644 --- a/tensorflow/lite/micro/micro_allocator.h +++ b/tensorflow/lite/micro/micro_allocator.h @@ -64,7 +64,17 @@ typedef struct { // Allocator responsible for allocating memory for all intermediate tensors // necessary to invoke a model. - +// +// The lifetime of the model, tensor arena and error reporter must be at +// least as long as that of the allocator object, since the allocator needs +// them to be accessible during its entire lifetime. +// +// The MicroAllocator simply plans out additional allocations that are required +// to standup a model for inference in TF Micro. This class currently relies on +// an additional allocator - SimpleMemoryAllocator - for all allocations from an +// arena. These allocations are divided into head (non-persistent) and tail +// (persistent) regions: +// // Memory layout to help understand how it works // This information could change in the future version. // ************** .memory_allocator->GetBuffer() @@ -77,20 +87,21 @@ typedef struct { // ************** .memory_allocator->GetBuffer() + ->GetMaxBufferSize() class MicroAllocator { public: - // The lifetime of the model, tensor allocator and error reporter must be at - // least as long as that of the allocator object, since the allocator needs - // them to be accessible during its entire lifetime. - + // Creates a MicroAllocator instance from a given tensor arena. This arena + // will be managed by the created instance. // Note: Please use __declspec(align(16)) to make sure tensor_arena is 16 // bytes aligned, otherwise some head room will be wasted. - MicroAllocator(TfLiteContext* context, const Model* model, - uint8_t* tensor_arena, size_t arena_size, - ErrorReporter* error_reporter); - virtual ~MicroAllocator(); + // TODO(b/157615197): Cleanup constructor + factory usage. + static MicroAllocator* Create(TfLiteContext* context, const Model* model, + uint8_t* tensor_arena, size_t arena_size, + ErrorReporter* error_reporter); - // Initializes the allocator by allocating required internal structs required - // to prepare the model from the flatbuffer data in PrepareFromFlatbuffer. - TfLiteStatus Init(); + // Creates a MicroAllocator instance using the provided SimpleMemoryAllocator + // intance. This allocator instance will use the SimpleMemoryAllocator + // instance to manage allocations internally. + static MicroAllocator* Create(TfLiteContext* context, const Model* model, + SimpleMemoryAllocator* memory_allocator, + ErrorReporter* error_reporter); // Run through the model flatbuffer data (loaded from the TfLiteModel // instance) to allocate nodes and registrations. We need to keep them for the @@ -132,6 +143,7 @@ class MicroAllocator { MicroAllocator(TfLiteContext* context, const Model* model, SimpleMemoryAllocator* memory_allocator, ErrorReporter* error_reporter); + virtual ~MicroAllocator(); // Allocates an array in the arena to hold pointers to the tensors required // to initialize and prepare a model. These allocations are stored and @@ -166,9 +178,10 @@ class MicroAllocator { ErrorReporter* error_reporter(); - private: + // Initializes the graph and allocates TfLiteContext tensor data. TfLiteStatus InitGraphAndContextTensorData(); + private: // A simple memory allocator that always allocate from the arena tail. SimpleMemoryAllocator* memory_allocator_; diff --git a/tensorflow/lite/micro/micro_allocator_test.cc b/tensorflow/lite/micro/micro_allocator_test.cc index a85d30dab46..d2258498b0c 100644 --- a/tensorflow/lite/micro/micro_allocator_test.cc +++ b/tensorflow/lite/micro/micro_allocator_test.cc @@ -87,6 +87,8 @@ TF_LITE_MICRO_TEST(TestInitializeRuntimeTensor) { TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32); TF_LITE_MICRO_EXPECT_EQ(kTfLiteArenaRw, allocated_tensor.allocation_type); + + simple_allocator->~SimpleMemoryAllocator(); } TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { @@ -114,6 +116,8 @@ TF_LITE_MICRO_TEST(TestInitializeQuantizedTensor) { TF_LITE_MICRO_EXPECT_EQ(400, allocated_tensor.bytes); TF_LITE_MICRO_EXPECT_EQ(nullptr, allocated_tensor.data.i32); TF_LITE_MICRO_EXPECT_EQ(kTfLiteArenaRw, allocated_tensor.allocation_type); + + simple_allocator->~SimpleMemoryAllocator(); } TF_LITE_MICRO_TEST(TestMissingQuantization) { @@ -149,16 +153,16 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) { 760 /* minimal arena size at the time of writting */ + 16 /* alignment */ + 100 /* leave some headroom for future proof */; uint8_t arena[arena_size]; - tflite::MicroAllocator allocator(&context, model, arena, arena_size, - micro_test::reporter); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.Init()); + tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create( + &context, model, arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); TF_LITE_MICRO_EXPECT_EQ(4, context.tensors_size); // Memory planning hasn't been finalized, so the used bytes is unknown. - TF_LITE_MICRO_EXPECT_EQ(0, allocator.used_bytes()); + TF_LITE_MICRO_EXPECT_EQ(0, allocator->used_bytes()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation()); // No allocation to be done afterwards. - TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator.FinishTensorAllocation()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->FinishTensorAllocation()); // NOTE: Tensor indexes match the values in GetSimpleMockModel(). tflite::testing::VerifyMockTensor(&context.tensors[0]); @@ -178,7 +182,7 @@ TF_LITE_MICRO_TEST(TestFinishTensorAllocation) { context.tensors[1].data.raw); TF_LITE_MICRO_EXPECT_NE(context.tensors[3].data.raw, context.tensors[2].data.raw); - TF_LITE_MICRO_EXPECT_LE(allocator.used_bytes(), 760 + 100); + TF_LITE_MICRO_EXPECT_LE(allocator->used_bytes(), 760 + 100); } TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { @@ -186,10 +190,10 @@ TF_LITE_MICRO_TEST(TestAllocationForModelsWithBranches) { TfLiteContext context; constexpr size_t arena_size = 4096; uint8_t arena[arena_size]; - tflite::MicroAllocator allocator(&context, model, arena, arena_size, - micro_test::reporter); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.Init()); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation()); + tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create( + &context, model, arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation()); uint8_t* start = context.tensors[0].data.uint8; // Check test_helpers.cc BuildSimpleModelWithBranch for model structure. @@ -211,14 +215,14 @@ TF_LITE_MICRO_TEST(TestFinishComplexTensorAllocation) { TfLiteContext context; constexpr size_t arena_size = 2048; uint8_t arena[arena_size]; - tflite::MicroAllocator allocator(&context, model, arena, arena_size, - micro_test::reporter); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.Init()); + tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create( + &context, model, arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.FinishTensorAllocation()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator->FinishTensorAllocation()); // No allocation to be done afterwards. - TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator.FinishTensorAllocation()); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteError, allocator->FinishTensorAllocation()); // NOTE: Tensor indexes match the values in GetComplexMockModel(). tflite::testing::VerifyMockTensor(&context.tensors[0]); @@ -246,9 +250,9 @@ TF_LITE_MICRO_TEST(TestDoubleInitFails) { TfLiteContext context; constexpr size_t arena_size = 2048; uint8_t arena[arena_size]; - tflite::MicroAllocator allocator(&context, model, arena, arena_size, - micro_test::reporter); - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, allocator.Init()); + tflite::MicroAllocator* allocator = tflite::MicroAllocator::Create( + &context, model, arena, arena_size, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, allocator); TF_LITE_MICRO_EXPECT_EQ(10, context.tensors_size); } diff --git a/tensorflow/lite/micro/micro_interpreter.cc b/tensorflow/lite/micro/micro_interpreter.cc index 9f0f073af10..1d5dfe66c8e 100644 --- a/tensorflow/lite/micro/micro_interpreter.cc +++ b/tensorflow/lite/micro/micro_interpreter.cc @@ -79,21 +79,45 @@ MicroInterpreter::MicroInterpreter(const Model* model, : model_(model), op_resolver_(op_resolver), error_reporter_(error_reporter), - allocator_(&context_, model_, tensor_arena, tensor_arena_size, - error_reporter_), - tensors_allocated_(false), + allocator_(*MicroAllocator::Create(&context_, model, tensor_arena, + tensor_arena_size, error_reporter)), context_helper_(error_reporter_, &allocator_) { - const flatbuffers::Vector>* subgraphs = - model->subgraphs(); - if (subgraphs->size() != 1) { - TF_LITE_REPORT_ERROR(error_reporter, - "Only 1 subgraph is currently supported.\n"); - initialization_status_ = kTfLiteError; - return; + Init(); +} + +MicroInterpreter::MicroInterpreter(const Model* model, + const MicroOpResolver* op_resolver, + MicroAllocator* allocator, + ErrorReporter* error_reporter) + : model_(model), + op_resolver_(*op_resolver), + error_reporter_(error_reporter), + allocator_(*allocator), + context_helper_(error_reporter_, &allocator_) { + Init(); +} + +MicroInterpreter::~MicroInterpreter() { + if (node_and_registrations_ != nullptr) { + for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { + TfLiteNode* node = &(node_and_registrations_[i].node); + const TfLiteRegistration* registration = + node_and_registrations_[i].registration; + // registration is allocated outside the interpreter, so double check to + // make sure it's not nullptr; + if (registration != nullptr && registration->free != nullptr) { + registration->free(&context_, node->user_data); + } + } } - if (allocator_.Init() != kTfLiteOk) { - TF_LITE_REPORT_ERROR(error_reporter, - "Failed to initialize the allocator.\n"); +} + +void MicroInterpreter::Init() { + const flatbuffers::Vector>* subgraphs = + model_->subgraphs(); + if (subgraphs->size() != 1) { + TF_LITE_REPORT_ERROR(error_reporter_, + "Only 1 subgraph is currently supported.\n"); initialization_status_ = kTfLiteError; return; } @@ -119,21 +143,6 @@ MicroInterpreter::MicroInterpreter(const Model* model, initialization_status_ = kTfLiteOk; } -MicroInterpreter::~MicroInterpreter() { - if (node_and_registrations_ != nullptr) { - for (size_t i = 0; i < subgraph_->operators()->size(); ++i) { - TfLiteNode* node = &(node_and_registrations_[i].node); - const TfLiteRegistration* registration = - node_and_registrations_[i].registration; - // registration is allocated outside the interpreter, so double check to - // make sure it's not nullptr; - if (registration != nullptr && registration->free != nullptr) { - registration->free(&context_, node->user_data); - } - } - } -} - void MicroInterpreter::CorrectTensorEndianness(TfLiteTensor* tensorCorr) { int32_t tensorSize = 1; for (int d = 0; d < tensorCorr->dims->size; ++d) diff --git a/tensorflow/lite/micro/micro_interpreter.h b/tensorflow/lite/micro/micro_interpreter.h index a0b70527905..bffaca9a292 100644 --- a/tensorflow/lite/micro/micro_interpreter.h +++ b/tensorflow/lite/micro/micro_interpreter.h @@ -76,6 +76,14 @@ class MicroInterpreter { uint8_t* tensor_arena, size_t tensor_arena_size, ErrorReporter* error_reporter); + // Create an interpreter instance using an existing MicroAllocator instance. + // This constructor should be used when creating an allocator that needs to + // have allocation handled in more than one interpreter or for recording + // allocations inside the interpreter. The lifetime of the allocator must be + // as long as that of the interpreter object. + MicroInterpreter(const Model* model, const MicroOpResolver* op_resolver, + MicroAllocator* allocator, ErrorReporter* error_reporter); + ~MicroInterpreter(); // Runs through the model and allocates all necessary input, output and @@ -152,6 +160,10 @@ class MicroInterpreter { size_t arena_used_bytes() const { return allocator_.used_bytes(); } private: + // TODO(b/158263161): Consider switching to Create() function to enable better + // error reporting during initialization. + void Init(); + void CorrectTensorEndianness(TfLiteTensor* tensorCorr); template @@ -163,7 +175,7 @@ class MicroInterpreter { const MicroOpResolver& op_resolver_; ErrorReporter* error_reporter_; TfLiteContext context_ = {}; - MicroAllocator allocator_; + MicroAllocator& allocator_; bool tensors_allocated_; TfLiteStatus initialization_status_; diff --git a/tensorflow/lite/micro/recording_micro_allocator.cc b/tensorflow/lite/micro/recording_micro_allocator.cc index 96cbb6e00e5..1efdb06122c 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.cc +++ b/tensorflow/lite/micro/recording_micro_allocator.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/lite/micro/recording_micro_allocator.h" #include "tensorflow/lite/core/api/error_reporter.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/compatibility.h" #include "tensorflow/lite/micro/recording_simple_memory_allocator.h" @@ -29,6 +30,26 @@ RecordingMicroAllocator::RecordingMicroAllocator( error_reporter), recording_memory_allocator_(recording_memory_allocator) {} +RecordingMicroAllocator* RecordingMicroAllocator::Create( + TfLiteContext* context, const Model* model, + RecordingSimpleMemoryAllocator* memory_allocator, + ErrorReporter* error_reporter) { + TFLITE_DCHECK(context != nullptr); + TFLITE_DCHECK(model != nullptr); + TFLITE_DCHECK(memory_allocator != nullptr); + uint8_t* allocator_buffer = memory_allocator->AllocateFromTail( + sizeof(RecordingMicroAllocator), alignof(RecordingMicroAllocator)); + RecordingMicroAllocator* allocator = new (allocator_buffer) + RecordingMicroAllocator(context, model, memory_allocator, error_reporter); + if (allocator->InitGraphAndContextTensorData() != kTfLiteOk) { + TF_LITE_REPORT_ERROR( + error_reporter, + "RecordingMicroAllocator: Failed to initialize model graph."); + return nullptr; + } + return allocator; +} + RecordedAllocation RecordingMicroAllocator::GetRecordedAllocation( RecordedAllocationType allocation_type) { switch (allocation_type) { diff --git a/tensorflow/lite/micro/recording_micro_allocator.h b/tensorflow/lite/micro/recording_micro_allocator.h index 4a70c955cda..fcd068b97d4 100644 --- a/tensorflow/lite/micro/recording_micro_allocator.h +++ b/tensorflow/lite/micro/recording_micro_allocator.h @@ -44,14 +44,17 @@ typedef struct RecordedAllocation { // Utility subclass of MicroAllocator that records all allocations // inside the arena. A summary of allocations can be logged through the -// ErrorReporter by invoking LogAllocations(). Individual allocation recordings -// can be retrieved by type through the GetRecordedAllocation() function. This -// class should only be used for auditing memory usage or integration testing. +// ErrorReporter by invoking LogAllocations(). This special allocator requires +// an instance of RecordingSimpleMemoryAllocator to capture allocations in the +// head and tail. Arena allocation recording can be retrieved by type through +// the GetRecordedAllocation() function. This class should only be used for +// auditing memory usage or integration testing. class RecordingMicroAllocator : public MicroAllocator { public: - RecordingMicroAllocator(TfLiteContext* context, const Model* model, - RecordingSimpleMemoryAllocator* memory_allocator, - ErrorReporter* error_reporter); + static RecordingMicroAllocator* Create( + TfLiteContext* context, const Model* model, + RecordingSimpleMemoryAllocator* memory_allocator, + ErrorReporter* error_reporter); // Returns the recorded allocations information for a given allocation type. RecordedAllocation GetRecordedAllocation( @@ -74,6 +77,10 @@ class RecordingMicroAllocator : public MicroAllocator { void RecordAllocationUsage(RecordedAllocation& recorded_allocation); private: + RecordingMicroAllocator(TfLiteContext* context, const Model* model, + RecordingSimpleMemoryAllocator* memory_allocator, + ErrorReporter* error_reporter); + void PrintRecordedAllocation(RecordedAllocationType allocation_type, const char* allocation_name); diff --git a/tensorflow/lite/micro/recording_micro_allocator_test.cc b/tensorflow/lite/micro/recording_micro_allocator_test.cc index 7e1a1beeaeb..d22ad12d4fd 100644 --- a/tensorflow/lite/micro/recording_micro_allocator_test.cc +++ b/tensorflow/lite/micro/recording_micro_allocator_test.cc @@ -33,60 +33,19 @@ constexpr int kTestConvArenaSize = 1024 * 12; TF_LITE_MICRO_TESTS_BEGIN -TF_LITE_MICRO_TEST(TestRecordedValuesDefaultToZero) { - TfLiteContext context; - const tflite::Model* model = tflite::testing::GetSimpleMockModel(); - constexpr size_t arena_size = 1024; - uint8_t arena[arena_size]; - - tflite::RecordingSimpleMemoryAllocator memory_allocator(micro_test::reporter, - arena, arena_size); - tflite::RecordingMicroAllocator micro_allocator( - &context, model, &memory_allocator, micro_test::reporter); - - tflite::RecordedAllocation recorded_allocation; - - recorded_allocation = micro_allocator.GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArray); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.requested_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.used_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.count); - - recorded_allocation = micro_allocator.GetRecordedAllocation( - tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.requested_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.used_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.count); - - recorded_allocation = micro_allocator.GetRecordedAllocation( - tflite::RecordedAllocationType::kNodeAndRegistrationArray); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.requested_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.used_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.count); - - recorded_allocation = micro_allocator.GetRecordedAllocation( - tflite::RecordedAllocationType::kOpData); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.requested_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.used_bytes); - TF_LITE_MICRO_EXPECT_EQ(0, recorded_allocation.count); -} - TF_LITE_MICRO_TEST(TestRecordsTfLiteTensorArrayData) { TfLiteContext context; const tflite::Model* model = tflite::GetModel(kTestConvModelData); uint8_t arena[kTestConvArenaSize]; tflite::RecordingSimpleMemoryAllocator memory_allocator( micro_test::reporter, arena, kTestConvArenaSize); - tflite::RecordingMicroAllocator allocator(&context, model, &memory_allocator, - micro_test::reporter); - TfLiteStatus status = allocator.Init(); - - // TODO(b/158102673): Ugly workaround for not having fatal test assertions: - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, status); - if (status != kTfLiteOk) return 1; + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create( + &context, model, &memory_allocator, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); tflite::RecordedAllocation recorded_allocation = - allocator.GetRecordedAllocation( + micro_allocator->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArray); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, context.tensors_size); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, @@ -101,13 +60,10 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { uint8_t arena[kTestConvArenaSize]; tflite::RecordingSimpleMemoryAllocator memory_allocator( micro_test::reporter, arena, kTestConvArenaSize); - tflite::RecordingMicroAllocator allocator(&context, model, &memory_allocator, - micro_test::reporter); - TfLiteStatus status = allocator.Init(); - - // TODO(b/158102673): Ugly workaround for not having fatal test assertions: - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, status); - if (status != kTfLiteOk) return 1; + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create( + &context, model, &memory_allocator, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); // Walk the model subgraph to find all tensors with quantization params and // keep a tally. @@ -135,7 +91,7 @@ TF_LITE_MICRO_TEST(TestRecordsTensorArrayQuantizationData) { quantized_channel_bytes; tflite::RecordedAllocation recorded_allocation = - allocator.GetRecordedAllocation( + micro_allocator->GetRecordedAllocation( tflite::RecordedAllocationType::kTfLiteTensorArrayQuantizationData); // Each quantized tensors has 3 mallocs (quant struct, scale dimensions, zero @@ -154,23 +110,20 @@ TF_LITE_MICRO_TEST(TestRecordsNodeAndRegistrationArrayData) { uint8_t arena[kTestConvArenaSize]; tflite::RecordingSimpleMemoryAllocator memory_allocator( micro_test::reporter, arena, kTestConvArenaSize); - tflite::RecordingMicroAllocator allocator(&context, model, &memory_allocator, - micro_test::reporter); - TfLiteStatus status = allocator.Init(); - - // TODO(b/158102673): Ugly workaround for not having fatal test assertions: - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, status); - if (status != kTfLiteOk) return 1; + tflite::RecordingMicroAllocator* micro_allocator = + tflite::RecordingMicroAllocator::Create( + &context, model, &memory_allocator, micro_test::reporter); + TF_LITE_MICRO_EXPECT_NE(nullptr, micro_allocator); tflite::AllOpsResolver ops_resolver; tflite::NodeAndRegistration* node_and_registrations; - TF_LITE_MICRO_EXPECT_EQ( - kTfLiteOk, - allocator.PrepareFromFlatbuffer(ops_resolver, &node_and_registrations)); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, + micro_allocator->PrepareFromFlatbuffer( + ops_resolver, &node_and_registrations)); size_t num_ops = model->subgraphs()->Get(0)->operators()->size(); tflite::RecordedAllocation recorded_allocation = - allocator.GetRecordedAllocation( + micro_allocator->GetRecordedAllocation( tflite::RecordedAllocationType::kNodeAndRegistrationArray); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.count, num_ops); TF_LITE_MICRO_EXPECT_EQ(recorded_allocation.requested_bytes, diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 3b416047c8f..84ff37b8cbd 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "tensorflow/lite/core/api/error_reporter.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" #include "tensorflow/lite/micro/memory_helpers.h" namespace tflite { @@ -41,6 +42,7 @@ SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, /* static */ SimpleMemoryAllocator* SimpleMemoryAllocator::Create( ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) { + TFLITE_DCHECK(buffer_head != nullptr); SimpleMemoryAllocator tmp = SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size); From 590a27e7db624fb465e88fb2dda54a72be15bb28 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Mon, 8 Jun 2020 21:25:10 -0700 Subject: [PATCH 099/178] Read producer/consumer ids as uint64. Also, add uint64 to XStatValue to support test cases with uint64 stats and add IntOrUintValue() to XStatVisitor. PiperOrigin-RevId: 315414571 Change-Id: I61f98161488c39df8557aa24c7f4f94e50a708f7 --- .../convert/xplane_to_memory_profile_test.cc | 54 ++--- .../convert/xplane_to_op_stats_test.cc | 23 ++- .../convert/xplane_to_step_events_test.cc | 25 ++- .../profiler/utils/derived_timeline_test.cc | 7 +- .../core/profiler/utils/group_events.cc | 27 ++- .../core/profiler/utils/group_events_test.cc | 185 +++++++++++++++--- .../core/profiler/utils/xplane_test_utils.h | 4 +- .../core/profiler/utils/xplane_visitor.h | 5 + 8 files changed, 236 insertions(+), 94 deletions(-) diff --git a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc index 2b6356a7aa1..6766fd5f1b5 100644 --- a/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_memory_profile_test.cc @@ -40,15 +40,15 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryAllocation", 40000, 1000, - {{StatType::kBytesReserved, 2000}, - {StatType::kBytesAllocated, 3000}, - {StatType::kBytesAvailable, 5000}, - {StatType::kPeakBytesInUse, 8500}, - {StatType::kRequestedBytes, 200}, - {StatType::kAllocationBytes, 256}, - {StatType::kAddress, 222333}, - {StatType::kStepId, -93746}, - {StatType::kDataType, 1}, + {{StatType::kBytesReserved, 2000LL}, + {StatType::kBytesAllocated, 3000LL}, + {StatType::kBytesAvailable, 5000LL}, + {StatType::kPeakBytesInUse, 8500LL}, + {StatType::kRequestedBytes, 200LL}, + {StatType::kAllocationBytes, 256LL}, + {StatType::kAddress, 222333LL}, + {StatType::kStepId, -93746LL}, + {StatType::kDataType, 1LL}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kTfOp, "foo/bar"}, {StatType::kRegionType, "output"}, @@ -56,30 +56,30 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) { CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryDeallocation", 50000, 1000, - {{StatType::kBytesReserved, 2000}, - {StatType::kBytesAllocated, 2744}, - {StatType::kBytesAvailable, 5256}, - {StatType::kPeakBytesInUse, 8500}, - {StatType::kRequestedBytes, 200}, - {StatType::kAllocationBytes, 256}, - {StatType::kAddress, 222333}, - {StatType::kStepId, 0}, - {StatType::kDataType, 0}, + {{StatType::kBytesReserved, 2000LL}, + {StatType::kBytesAllocated, 2744LL}, + {StatType::kBytesAvailable, 5256LL}, + {StatType::kPeakBytesInUse, 8500LL}, + {StatType::kRequestedBytes, 200LL}, + {StatType::kAllocationBytes, 256LL}, + {StatType::kAddress, 222333LL}, + {StatType::kStepId, 0LL}, + {StatType::kDataType, 0LL}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kRegionType, ""}, {StatType::kTensorShapes, ""}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "MemoryAllocation", 70000, 1000, - {{StatType::kBytesReserved, 2000}, - {StatType::kBytesAllocated, 5000}, - {StatType::kBytesAvailable, 3000}, - {StatType::kPeakBytesInUse, 9500}, - {StatType::kRequestedBytes, 300}, - {StatType::kAllocationBytes, 300}, - {StatType::kAddress, 345678}, - {StatType::kStepId, -93746}, - {StatType::kDataType, 9}, + {{StatType::kBytesReserved, 2000LL}, + {StatType::kBytesAllocated, 5000LL}, + {StatType::kBytesAvailable, 3000LL}, + {StatType::kPeakBytesInUse, 9500LL}, + {StatType::kRequestedBytes, 300LL}, + {StatType::kAllocationBytes, 300LL}, + {StatType::kAddress, 345678LL}, + {StatType::kStepId, -93746LL}, + {StatType::kDataType, 9LL}, {StatType::kAllocatorName, "GPU_0_bfc"}, {StatType::kTfOp, "mul_grad/Sum"}, {StatType::kRegionType, "temp"}, diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc index e4cda680a56..5c1b6f8a89e 100644 --- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc @@ -86,6 +86,9 @@ TEST(ConvertXPlaneToOpStats, RunEnvironment) { } TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 0; + XSpace space; XPlaneBuilder host_plane_builder(space.add_planes()); host_plane_builder.SetName(kHostThreads); @@ -93,14 +96,14 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 100, {{StatType::kStepNum, 123}}); + 0, 100, {{StatType::kStepNum, kStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 90, {{StatType::kStepId, 0}}); + 10, 90, {{StatType::kStepId, kStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); @@ -111,6 +114,10 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { } TEST(ConvertXPlaneToOpStats, GpuStepDbTest) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 0; + constexpr int64 kCorrelationId = 100; + XSpace space; XPlaneBuilder host_plane_builder(space.add_planes()); host_plane_builder.SetName(kHostThreads); @@ -118,16 +125,16 @@ TEST(ConvertXPlaneToOpStats, GpuStepDbTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 100, {{StatType::kStepNum, 123}}); + 0, 100, {{StatType::kStepNum, kStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 90, {{StatType::kStepId, 0}}); + 10, 90, {{StatType::kStepId, kStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 20, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 10, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); XPlaneBuilder device_plane_builder(space.add_planes()); device_plane_builder.SetName(absl::StrCat(kGpuPlanePrefix, ":0")); @@ -135,7 +142,7 @@ TEST(ConvertXPlaneToOpStats, GpuStepDbTest) { auto stream = device_plane_builder.GetOrCreateLine(0); CreateXEvent(&device_plane_builder, &stream, "matmul", 50, 40, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); OpStats op_stats = ConvertXSpaceToOpStats(space); diff --git a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc index 9ace9eb185c..ff68f1817ed 100644 --- a/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc +++ b/tensorflow/core/profiler/convert/xplane_to_step_events_test.cc @@ -37,6 +37,13 @@ namespace { // consists of matmul. The host's step db should be created only for the step // observed on the host. TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { + constexpr int64 kFirstStepNum = 123; + constexpr int64 kSecondStepNum = 456; + constexpr int64 kFirstStepId = 0; + constexpr int64 kSecondStepId = 1; + constexpr int64 kFirstCorrelationId = 100; + constexpr int64 kSecondCorrelationId = 200; + XSpace space; XPlane* host_plane = space.add_planes(); XPlaneBuilder host_plane_builder(host_plane); @@ -45,25 +52,25 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 100, {{StatType::kStepNum, 123}}); + 0, 100, {{StatType::kStepNum, kFirstStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 90, {{StatType::kStepId, 0}}); + 10, 90, {{StatType::kStepId, kFirstStepId}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 300, 100, {{StatType::kStepNum, 456}}); + 300, 100, {{StatType::kStepNum, kSecondStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 310, 90, {{StatType::kStepId, 1}}); + 310, 90, {{StatType::kStepId, kSecondStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 20, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kFirstStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 10, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kFirstCorrelationId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 320, 20, - {{StatType::kStepId, 1}}); + {{StatType::kStepId, kSecondStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 330, 10, - {{StatType::kCorrelationId, 200}}); + {{StatType::kCorrelationId, kSecondCorrelationId}}); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -71,7 +78,7 @@ TEST(ConvertXPlaneToOpStats, CpuOnlyStepDbTest) { auto stream = device_plane_builder.GetOrCreateLine(0); CreateXEvent(&device_plane_builder, &stream, "matmul", 50, 40, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kFirstCorrelationId}}); GroupTfEvents(&space, nullptr); StepEvents device_step_events = diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc index c6922e7ab74..4ae558eb446 100644 --- a/tensorflow/core/profiler/utils/derived_timeline_test.cc +++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc @@ -102,6 +102,9 @@ TEST(DerivedTimelineTest, TfOpLineTest) { // Checks that the dependency between the step line and the TF op line prevents // TF op events from being expanded. TEST(DerivedTimelineTest, DependencyTest) { + constexpr int64 kFirstGroupId = 0; + constexpr int64 kSecondGroupId = 1; + const absl::string_view kTfOpName = "mul:Mul"; const absl::string_view kKernelDetails = "kernel_details"; XSpace space; @@ -110,11 +113,11 @@ TEST(DerivedTimelineTest, DependencyTest) { XPlaneBuilder plane_builder(plane); auto line_builder = plane_builder.GetOrCreateLine(0); CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, - {{StatType::kGroupId, 0}, + {{StatType::kGroupId, kFirstGroupId}, {StatType::kLevel0, kTfOpName}, {StatType::kKernelDetails, kKernelDetails}}); CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, - {{StatType::kGroupId, 1}, + {{StatType::kGroupId, kSecondGroupId}, {StatType::kLevel0, kTfOpName}, {StatType::kKernelDetails, kKernelDetails}}); GenerateDerivedTimeLines(event_group_name_map, &space); diff --git a/tensorflow/core/profiler/utils/group_events.cc b/tensorflow/core/profiler/utils/group_events.cc index 99c6136fc84..b0c4f7972a5 100644 --- a/tensorflow/core/profiler/utils/group_events.cc +++ b/tensorflow/core/profiler/utils/group_events.cc @@ -111,6 +111,8 @@ void SetGroupId(const XPlaneVisitor& visitor, int64 group_id, XEvent* event) { void SetContextGroup(EventNode* event, ContextGroupMap* context_groups) { auto producer = event->GetProducerContext(); if (producer.has_value()) { + DCHECK_EQ(((*context_groups)[producer->type][producer->id]).producer, + nullptr); ((*context_groups)[producer->type][producer->id]).producer = event; } auto consumer = event->GetConsumerContext(); @@ -124,9 +126,10 @@ void ConnectContextGroups(const ContextGroupMap& context_groups) { for (auto& type_id_group : context_groups) { for (auto& id_group : type_id_group.second) { const ContextGroup& group = id_group.second; - EventNode* parent = group.producer; - for (EventNode* child : group.consumers) { - parent->AddChild(child); + if (EventNode* parent = group.producer) { + for (EventNode* child : group.consumers) { + parent->AddChild(child); + } } } } @@ -194,13 +197,13 @@ EventNode::EventNode(const XPlaneVisitor* plane, XLine* raw_line, producer_type = stat.IntValue(); break; case StatType::kProducerId: - producer_id = stat.IntValue(); + producer_id = stat.UintValue(); break; case StatType::kConsumerType: consumer_type = stat.IntValue(); break; case StatType::kConsumerId: - consumer_id = stat.IntValue(); + consumer_id = stat.UintValue(); break; case StatType::kIsRoot: is_root_ = stat.IntValue(); @@ -330,7 +333,7 @@ void EventForest::ConnectIntraThread(const XPlaneVisitor& visitor, void EventForest::ConnectInterThread( const std::vector& connect_info_list) { for (const auto& connect_info : connect_info_list) { - absl::flat_hash_map, EventNode*> connect_map; + absl::flat_hash_map, EventNode*> connect_map; const std::vector& parent_stat_types = connect_info.parent_stat_types; const std::vector* child_stat_types = &connect_info.child_stat_types; @@ -340,14 +343,12 @@ void EventForest::ConnectInterThread( if (auto parent_event_node_list = gtl::FindOrNull(event_node_map_, connect_info.parent_event_type)) { for (const auto& parent_event_node : *parent_event_node_list) { - std::vector stats; + std::vector stats; for (auto stat_type : parent_stat_types) { absl::optional stat = parent_event_node->GetContextStat(stat_type); if (!stat) break; - stats.push_back((stat->ValueCase() == XStat::kInt64Value) - ? stat->IntValue() - : stat->UintValue()); + stats.push_back(stat->IntOrUintValue()); } if (stats.size() == parent_stat_types.size()) { connect_map[stats] = parent_event_node.get(); @@ -357,14 +358,12 @@ void EventForest::ConnectInterThread( if (auto child_event_node_list = gtl::FindOrNull(event_node_map_, connect_info.child_event_type)) { for (const auto& child_event_node : *child_event_node_list) { - std::vector stats; + std::vector stats; for (auto stat_type : *child_stat_types) { absl::optional stat = child_event_node->GetContextStat(stat_type); if (!stat) break; - stats.push_back((stat->ValueCase() == XStat::kInt64Value) - ? stat->IntValue() - : stat->UintValue()); + stats.push_back(stat->IntOrUintValue()); } if (stats.size() == child_stat_types->size()) { if (auto parent_event_node = gtl::FindPtrOrNull(connect_map, stats)) { diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc index 6ff069dc1ae..dd56f56f741 100644 --- a/tensorflow/core/profiler/utils/group_events_test.cc +++ b/tensorflow/core/profiler/utils/group_events_test.cc @@ -31,6 +31,10 @@ namespace profiler { namespace { TEST(GroupEventsTest, GroupGpuTraceTest) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 0; + constexpr int64 kCorrelationId = 100; + XSpace space; XPlaneBuilder host_plane_builder(space.add_planes()); host_plane_builder.SetName(kHostThreads); @@ -38,16 +42,16 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 100, {{StatType::kStepNum, 123}}); + 0, 100, {{StatType::kStepNum, kStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 90, {{StatType::kStepId, 0}}); + 10, 90, {{StatType::kStepId, kStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -55,7 +59,7 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { auto stream = device_plane_builder.GetOrCreateLine(0); CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); EventGroupNameMap event_group_name_map; GroupTfEvents(&space, &event_group_name_map); @@ -69,6 +73,10 @@ TEST(GroupEventsTest, GroupGpuTraceTest) { } TEST(GroupEventsTest, GroupTensorFlowLoopTest) { + constexpr int64 kStepId = 0; + constexpr int64 kIterNum = 10; + constexpr int64 kCorrelationId = 100; + XSpace space; XPlaneBuilder host_plane_builder(space.add_planes()); host_plane_builder.SetName(kHostThreads); @@ -77,12 +85,12 @@ TEST(GroupEventsTest, GroupTensorFlowLoopTest) { auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 5, 10, - {{StatType::kStepId, 0}, {StatType::kIterNum, 10}}); + {{StatType::kStepId, kStepId}, {StatType::kIterNum, kIterNum}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}, {StatType::kIterNum, 10}}); + {{StatType::kStepId, kStepId}, {StatType::kIterNum, kIterNum}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); XPlane* device_plane = space.add_planes(); XPlaneBuilder device_plane_builder(device_plane); @@ -90,7 +98,7 @@ TEST(GroupEventsTest, GroupTensorFlowLoopTest) { auto stream = device_plane_builder.GetOrCreateLine(0); CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); EventGroupNameMap event_group_name_map; GroupTfEvents(&space, &event_group_name_map); @@ -111,6 +119,11 @@ TEST(GroupEventsTest, GroupTensorFlowLoopTest) { // group_id is initialized to the first TF loop's first iter_num (10) and then // monotonically increased. TEST(GroupEventsTest, GroupMultipleTensorFlowLoopsTest) { + constexpr int64 kFirstStepId = 0; + constexpr int64 kSecondStepId = 1; + constexpr int64 kFirstIterNumStart = 10; + constexpr int64 kSecondIterNumStart = 0; + XSpace space; XPlaneBuilder host_plane_builder(space.add_planes()); host_plane_builder.SetName(kHostThreads); @@ -119,17 +132,21 @@ TEST(GroupEventsTest, GroupMultipleTensorFlowLoopsTest) { auto first_tf_executor_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &first_tf_executor_thread, HostEventType::kExecutorStateProcess, 220, 80, - {{StatType::kStepId, 1}, {StatType::kIterNum, 0}}); + {{StatType::kStepId, kSecondStepId}, + {StatType::kIterNum, kSecondIterNumStart}}); CreateXEvent(&host_plane_builder, &first_tf_executor_thread, HostEventType::kExecutorStateProcess, 320, 80, - {{StatType::kStepId, 1}, {StatType::kIterNum, 1}}); + {{StatType::kStepId, kSecondStepId}, + {StatType::kIterNum, kSecondIterNumStart + 1}}); auto second_tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &second_tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}, {StatType::kIterNum, 10}}); + {{StatType::kStepId, kFirstStepId}, + {StatType::kIterNum, kFirstIterNumStart}}); CreateXEvent(&host_plane_builder, &second_tf_executor_thread, HostEventType::kExecutorStateProcess, 120, 80, - {{StatType::kStepId, 0}, {StatType::kIterNum, 11}}); + {{StatType::kStepId, kFirstStepId}, + {StatType::kIterNum, kFirstIterNumStart + 1}}); EventGroupNameMap event_group_name_map; GroupTfEvents(&space, &event_group_name_map); @@ -141,6 +158,10 @@ TEST(GroupEventsTest, GroupMultipleTensorFlowLoopsTest) { } TEST(GroupEventsTest, GroupFunctionalOp) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 0; + constexpr int64 kFunctionStepId = 1; + XSpace space; XPlane* host_plane = space.add_planes(); XPlaneBuilder host_plane_builder(host_plane); @@ -149,20 +170,20 @@ TEST(GroupEventsTest, GroupFunctionalOp) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 200, {{StatType::kStepNum, 123}}); + 0, 200, {{StatType::kStepNum, kStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 190, {{StatType::kStepId, 0}}); + 10, 190, {{StatType::kStepId, kStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kRemoteCallOp, 30, 70, - {{StatType::kFunctionStepId, 1}}); + {{StatType::kFunctionStepId, kFunctionStepId}}); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 100, 150, - {{StatType::kStepId, 1}}); + {{StatType::kStepId, kFunctionStepId}}); EventGroupNameMap event_group_name_map; GroupTfEvents(&space, &event_group_name_map); @@ -185,6 +206,8 @@ TEST(GroupEventsTest, GroupFunctionalOp) { } TEST(GroupEventsTest, EagerOpTest) { + constexpr int64 kCorrelationId = 100; + XSpace space; XPlane* host_plane = space.add_planes(); XPlaneBuilder host_plane_builder(host_plane); @@ -194,12 +217,12 @@ TEST(GroupEventsTest, EagerOpTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); // Eagerly scheduled GPU kernel. CreateXEvent(&host_plane_builder, &main_thread, - HostEventType::kEagerKernelExecute, 10, 100, {}); + HostEventType::kEagerKernelExecute, 10, 100); CreateXEvent(&host_plane_builder, &main_thread, "matmul", 10, 100, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); // Eagerly executed CPU TF op. CreateXEvent(&host_plane_builder, &main_thread, - HostEventType::kEagerKernelExecute, 120, 80, {}); + HostEventType::kEagerKernelExecute, 120, 80); CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80); XPlane* device_plane = space.add_planes(); @@ -209,7 +232,7 @@ TEST(GroupEventsTest, EagerOpTest) { auto stream = device_plane_builder.GetOrCreateLine(0); // Eagerly executed GPU kernel. CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); @@ -227,6 +250,10 @@ TEST(GroupEventsTest, EagerOpTest) { } TEST(GroupEventsTest, FunctionOpTest) { + constexpr int64 kStepNum = 123; + constexpr int64 kStepId = 0; + constexpr int64 kCorrelationId = 100; + XSpace space; XPlane* host_plane = space.add_planes(); XPlaneBuilder host_plane_builder(host_plane); @@ -235,19 +262,19 @@ TEST(GroupEventsTest, FunctionOpTest) { auto main_thread = host_plane_builder.GetOrCreateLine(0); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kTraceContext, - 0, 100, {{StatType::kStepNum, 123}}); + 0, 100, {{StatType::kStepNum, kStepNum}}); CreateXEvent(&host_plane_builder, &main_thread, - HostEventType::kEagerKernelExecute, 10, 90, {}); + HostEventType::kEagerKernelExecute, 10, 90); CreateXEvent(&host_plane_builder, &main_thread, HostEventType::kFunctionRun, - 10, 90, {{StatType::kStepId, 0}}); + 10, 90, {{StatType::kStepId, kStepId}}); auto tf_executor_thread = host_plane_builder.GetOrCreateLine(1); CreateXEvent(&host_plane_builder, &tf_executor_thread, HostEventType::kExecutorStateProcess, 20, 80, - {{StatType::kStepId, 0}}); + {{StatType::kStepId, kStepId}}); // GPU kernel scheduled inside tf.function. CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); // CPU TF op executed inside tf.function. CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20); @@ -258,7 +285,7 @@ TEST(GroupEventsTest, FunctionOpTest) { auto stream = device_plane_builder.GetOrCreateLine(0); // GPU kernel executed as part of tf.function. CreateXEvent(&device_plane_builder, &stream, "matmul", 200, 300, - {{StatType::kCorrelationId, 100}}); + {{StatType::kCorrelationId, kCorrelationId}}); GroupTfEvents(&space, /*event_group_name_map=*/nullptr); XPlaneVisitor host_plane_visitor = CreateTfXPlaneVisitor(host_plane); @@ -276,8 +303,9 @@ TEST(GroupEventsTest, FunctionOpTest) { } TEST(GroupEventsTest, SemanticArgTest) { + constexpr int64 kIsRoot = 1; constexpr int64 kStepNum = 100; - constexpr int kContextType = 123; + constexpr int64 kContextType = 123; constexpr uint64 kContextId = 456; XSpace raw_space; @@ -286,7 +314,7 @@ TEST(GroupEventsTest, SemanticArgTest) { plane.ReserveLines(2); auto root_producer = plane.GetOrCreateLine(0); CreateXEvent(&plane, &root_producer, HostEventType::kTraceContext, 0, 100, - {{StatType::kIsRoot, 1}, {StatType::kStepNum, kStepNum}}); + {{StatType::kIsRoot, kIsRoot}, {StatType::kStepNum, kStepNum}}); CreateXEvent(&plane, &root_producer, HostEventType::kFunctionRun, 10, 90, {{StatType::kProducerType, kContextType}, {StatType::kProducerId, kContextId}}); @@ -314,7 +342,99 @@ TEST(GroupEventsTest, SemanticArgTest) { EXPECT_EQ(num_events, 3); } +TEST(GroupEventsTest, SemanticIntArgNoMatchTest) { + constexpr int64 kIsRoot = 1; + constexpr int64 kStepNum = 100; + constexpr int64 kContextType = 123; + constexpr uint64 kProducerId = 456; + constexpr uint64 kConsumerId = 789; + + XSpace raw_space; + XPlane* raw_plane = raw_space.add_planes(); + XPlaneBuilder plane(raw_plane); + plane.ReserveLines(2); + auto root_producer = plane.GetOrCreateLine(0); + CreateXEvent(&plane, &root_producer, HostEventType::kTraceContext, 0, 100, + {{StatType::kIsRoot, kIsRoot}, {StatType::kStepNum, kStepNum}}); + CreateXEvent(&plane, &root_producer, HostEventType::kFunctionRun, 10, 90, + {{StatType::kProducerType, kContextType}, + {StatType::kProducerId, kProducerId}}); + auto consumer = plane.GetOrCreateLine(1); + CreateXEvent(&plane, &consumer, HostEventType::kExecutorStateProcess, 20, 80, + {{StatType::kConsumerType, kContextType}, + {StatType::kConsumerId, kConsumerId}}); + + GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + int num_events = 0; + CreateTfXPlaneVisitor(raw_plane).ForEachLine( + [&](const tensorflow::profiler::XLineVisitor& line) { + num_events += line.NumEvents(); + line.ForEachEvent( + [&](const tensorflow::profiler::XEventVisitor& event) { + absl::optional group_id; + if (absl::optional stat = + event.GetStat(StatType::kGroupId)) { + group_id = stat->IntValue(); + } + if (event.Type() == HostEventType::kExecutorStateProcess) { + EXPECT_FALSE(group_id.has_value()); + } else { + EXPECT_TRUE(group_id.has_value()); + EXPECT_EQ(*group_id, 0); + } + }); + }); + EXPECT_EQ(num_events, 3); +} + +TEST(GroupEventsTest, SemanticUintArgNoMatchTest) { + constexpr int64 kIsRoot = 1; + constexpr int64 kStepNum = 100; + constexpr int64 kContextType = 123; + constexpr uint64 kProducerId = UINT64_MAX; + constexpr uint64 kConsumerId = UINT64_MAX - 1; + + XSpace raw_space; + XPlane* raw_plane = raw_space.add_planes(); + XPlaneBuilder plane(raw_plane); + plane.ReserveLines(2); + auto root_producer = plane.GetOrCreateLine(0); + CreateXEvent(&plane, &root_producer, HostEventType::kTraceContext, 0, 100, + {{StatType::kIsRoot, kIsRoot}, {StatType::kStepNum, kStepNum}}); + CreateXEvent(&plane, &root_producer, HostEventType::kFunctionRun, 10, 90, + {{StatType::kProducerType, kContextType}, + {StatType::kProducerId, kProducerId}}); + auto consumer = plane.GetOrCreateLine(1); + CreateXEvent(&plane, &consumer, HostEventType::kExecutorStateProcess, 20, 80, + {{StatType::kConsumerType, kContextType}, + {StatType::kConsumerId, kConsumerId}}); + + GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); + int num_events = 0; + CreateTfXPlaneVisitor(raw_plane).ForEachLine( + [&](const tensorflow::profiler::XLineVisitor& line) { + num_events += line.NumEvents(); + line.ForEachEvent( + [&](const tensorflow::profiler::XEventVisitor& event) { + absl::optional group_id; + if (absl::optional stat = + event.GetStat(StatType::kGroupId)) { + group_id = stat->IntValue(); + } + if (event.Type() == HostEventType::kExecutorStateProcess) { + EXPECT_FALSE(group_id.has_value()); + } else { + EXPECT_TRUE(group_id.has_value()); + EXPECT_EQ(*group_id, 0); + } + }); + }); + EXPECT_EQ(num_events, 3); +} + TEST(GroupEventsTest, AsyncEventTest) { + constexpr int64 kIsRoot = 1; + constexpr int64 kIsAsync = 1; constexpr absl::string_view kParent = "parent"; constexpr absl::string_view kAsync = "async"; constexpr absl::string_view kChild = "child"; @@ -324,8 +444,9 @@ TEST(GroupEventsTest, AsyncEventTest) { XPlaneBuilder plane(raw_plane); plane.ReserveLines(1); auto line = plane.GetOrCreateLine(0); - CreateXEvent(&plane, &line, kParent, 0, 100, {{StatType::kIsRoot, 1}}); - CreateXEvent(&plane, &line, kAsync, 10, 200, {{StatType::kIsAsync, 1}}); + CreateXEvent(&plane, &line, kParent, 0, 100, {{StatType::kIsRoot, kIsRoot}}); + CreateXEvent(&plane, &line, kAsync, 10, 200, + {{StatType::kIsAsync, kIsAsync}}); CreateXEvent(&plane, &line, kChild, 20, 80); GroupTfEvents(&raw_space, /*event_group_name_map=*/nullptr); diff --git a/tensorflow/core/profiler/utils/xplane_test_utils.h b/tensorflow/core/profiler/utils/xplane_test_utils.h index b42599baecd..9abf09fc695 100644 --- a/tensorflow/core/profiler/utils/xplane_test_utils.h +++ b/tensorflow/core/profiler/utils/xplane_test_utils.h @@ -26,7 +26,7 @@ limitations under the License. namespace tensorflow { namespace profiler { -using XStatValue = absl::variant; +using XStatValue = absl::variant; void CreateXEvent( XPlaneBuilder* plane_builder, XLineBuilder* line_builder, @@ -36,7 +36,7 @@ void CreateXEvent( void CreateXEvent( XPlaneBuilder* plane_builder, XLineBuilder* line_builder, HostEventType event_type, int64 offset_ps, int64 duration_ps, - std::initializer_list> stats); + std::initializer_list> stats = {}); void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder, XLineBuilder* line_builder, diff --git a/tensorflow/core/profiler/utils/xplane_visitor.h b/tensorflow/core/profiler/utils/xplane_visitor.h index a838825c773..6605bdf5658 100644 --- a/tensorflow/core/profiler/utils/xplane_visitor.h +++ b/tensorflow/core/profiler/utils/xplane_visitor.h @@ -53,6 +53,11 @@ class XStatVisitor { uint64 UintValue() const { return stat_->uint64_value(); } + uint64 IntOrUintValue() const { + return ValueCase() == XStat::kUint64Value ? UintValue() + : static_cast(IntValue()); + } + double DoubleValue() const { return stat_->double_value(); } // Returns a string view. From 7b0071d95406f500f81393b0486909dd847dd584 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Mon, 8 Jun 2020 21:40:20 -0700 Subject: [PATCH 100/178] Add build-related tags to TensorFlowLite and TensorFlowLiteAllDelegates PiperOrigin-RevId: 315416013 Change-Id: Iadec680f8c999cd9c32d7945e0da66675da4191b --- tensorflow/lite/experimental/swift/BUILD.apple | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/experimental/swift/BUILD.apple b/tensorflow/lite/experimental/swift/BUILD.apple index 9ea45854fed..d16e5b1030b 100644 --- a/tensorflow/lite/experimental/swift/BUILD.apple +++ b/tensorflow/lite/experimental/swift/BUILD.apple @@ -56,7 +56,7 @@ swift_library( "//conditions:default": [], }), module_name = "TensorFlowLite", - tags = TFL_DEFAULT_TAGS, + tags = TFL_DEFAULT_TAGS + ["nobuilder"], visibility = ios_visibility_whitelist(), deps = [ "//tensorflow/lite/experimental/ios:tensorflow_lite_c", @@ -82,7 +82,7 @@ swift_library( "-Wl,-weak_framework,Metal", ], module_name = "TensorFlowLite", - tags = TFL_DEFAULT_TAGS, + tags = TFL_DEFAULT_TAGS + ["builder_default_ios_arm64"], deps = [ "//tensorflow/lite/delegates/gpu:metal_delegate", "//tensorflow/lite/experimental/delegates/coreml:coreml_delegate", From 0266394d50a1f495afdfb40b4c2e2622c24502d9 Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 8 Jun 2020 22:16:33 -0700 Subject: [PATCH 101/178] Compress CUDA kernel binaries (CUBINs). Impact: -32MB wheel size -600MB _pywrap_tensorflow_internal.so -70MB memory during startup +120ms startup time PiperOrigin-RevId: 315419813 Change-Id: I2c39a88d95a4aa3a692560c8e3d78b125e8445c9 --- .../clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl | 5 ++++- .../crosstool/windows/msvc_wrapper_for_nvcc.py.tpl | 4 ++++ third_party/gpus/cuda/build_defs.bzl.tpl | 11 +++++++---- third_party/gpus/cuda_configure.bzl | 8 ++++---- .../windows/msvc_wrapper_for_nvcc.py | 4 ++++ .../clang/bin/crosstool_wrapper_driver_is_not_gcc | 5 ++++- 6 files changed, 27 insertions(+), 10 deletions(-) diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl index a48ef8bf35a..b35fec975da 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -62,7 +62,7 @@ def GetOptionValue(argv, option): Args: argv: A list of strings, possibly the argv passed to main(). - option: The option whose value to extract, without the leading '-'. + option: The option whose value to extract, with the leading '-'. Returns: A list of values, either directly following the option, @@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False): nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] std_options = ''.join([' -std=' + define for define in std_options if define in nvcc_allowed_std_options][-1:]) + fatbin_options = ''.join([' --fatbin-options=' + option + for option in GetOptionValue(argv, '-Xcuda-fatbinary')]) # The list of source files get passed after the -c option. I don't know of # any other reliable way to just get the list of source files to be compiled. @@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False): nvccopts += std_options nvccopts += m_options nvccopts += warning_options + nvccopts += fatbin_options if depfiles: # Generate the dependency file diff --git a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl index 73012876691..d2c9b917168 100644 --- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl +++ b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl @@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False): undefines, argv = GetOptionValue(argv, '/U') undefines = ['-U' + define for define in undefines] + fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary') + fatbin_options = ['--fatbin-options=' + option for option in fatbin_options] + # The rest of the unrecognized options should be passed to host compiler host_compiler_options = [option for option in argv if option not in (src_files + out_file)] @@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False): nvccopts += undefines nvccopts += defines nvccopts += m_options + nvccopts += fatbin_options nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"'] nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files # Specify a unique temp directory for nvcc to generate intermediate files, diff --git a/third_party/gpus/cuda/build_defs.bzl.tpl b/third_party/gpus/cuda/build_defs.bzl.tpl index bba772e2377..d931a02f9b4 100644 --- a/third_party/gpus/cuda/build_defs.bzl.tpl +++ b/third_party/gpus/cuda/build_defs.bzl.tpl @@ -40,12 +40,15 @@ def if_cuda_clang_opt(if_true, if_false = []): def cuda_default_copts(): """Default options for all CUDA compilations.""" - return if_cuda( - ["-x", "cuda", "-DGOOGLE_CUDA=1"] - ) + if_cuda_clang_opt( + return if_cuda([ + "-x", "cuda", + "-DGOOGLE_CUDA=1", + "-Xcuda-fatbinary=--compress-all", + "--no-cuda-include-ptx=all" + ] + %{cuda_extra_copts}) + if_cuda_clang_opt( # Some important CUDA optimizations are only enabled at O3. ["-O3"] - ) + %{cuda_extra_copts} + ) def cuda_is_configured(): """Returns true if CUDA was enabled during the configure process.""" diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 90ce206db01..0b87ba1ae2a 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -905,14 +905,14 @@ def _tf_sysroot(repository_ctx): return get_host_environ(repository_ctx, _TF_SYSROOT, "") def _compute_cuda_extra_copts(repository_ctx, compute_capabilities): - capability_flags = ["--no-cuda-include-ptx=all"] + copts = [] for capability in compute_capabilities: if capability.startswith("compute_"): capability = capability.replace("compute_", "sm_") - capability_flags.append("--cuda-include-ptx=%s" % capability) - capability_flags.append("--cuda-gpu-arch=%s" % capability) + copts.append("--cuda-include-ptx=%s" % capability) + copts.append("--cuda-gpu-arch=%s" % capability) - return str(capability_flags) + return str(copts) def _tpl_path(repository_ctx, filename): return repository_ctx.path(Label("//third_party/gpus/%s.tpl" % filename)) diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0/windows/msvc_wrapper_for_nvcc.py b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0/windows/msvc_wrapper_for_nvcc.py index 9d5f7fb6118..9c0b8b6efd6 100755 --- a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0/windows/msvc_wrapper_for_nvcc.py +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.0/windows/msvc_wrapper_for_nvcc.py @@ -130,6 +130,9 @@ def InvokeNvcc(argv, log=False): undefines, argv = GetOptionValue(argv, '/U') undefines = ['-U' + define for define in undefines] + fatbin_options, argv = GetOptionValue(argv, '-Xcuda-fatbinary') + fatbin_options = ['--fatbin-options=' + option for option in fatbin_options] + # The rest of the unrecognized options should be passed to host compiler host_compiler_options = [option for option in argv if option not in (src_files + out_file)] @@ -154,6 +157,7 @@ def InvokeNvcc(argv, log=False): nvccopts += undefines nvccopts += defines nvccopts += m_options + nvccopts += fatbin_options nvccopts += ['--compiler-options="' + " ".join(host_compiler_options) + '"'] nvccopts += ['-x', 'cu'] + opt + includes + out + ['-c'] + src_files # Specify a unique temp directory for nvcc to generate intermediate files, diff --git a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc index e427b01e9fa..d66945ab7c0 100755 --- a/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc +++ b/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1/clang/bin/crosstool_wrapper_driver_is_not_gcc @@ -62,7 +62,7 @@ def GetOptionValue(argv, option): Args: argv: A list of strings, possibly the argv passed to main(). - option: The option whose value to extract, without the leading '-'. + option: The option whose value to extract, with the leading '-'. Returns: A list of values, either directly following the option, @@ -189,6 +189,8 @@ def InvokeNvcc(argv, log=False): nvcc_allowed_std_options = ["c++03", "c++11", "c++14"] std_options = ''.join([' -std=' + define for define in std_options if define in nvcc_allowed_std_options][-1:]) + fatbin_options = ''.join([' --fatbin-options=' + option + for option in GetOptionValue(argv, '-Xcuda-fatbinary')]) # The list of source files get passed after the -c option. I don't know of # any other reliable way to just get the list of source files to be compiled. @@ -233,6 +235,7 @@ def InvokeNvcc(argv, log=False): nvccopts += std_options nvccopts += m_options nvccopts += warning_options + nvccopts += fatbin_options if depfiles: # Generate the dependency file From cf1b6b3dfe9ba82e805fddf7f4462b2d92fe550a Mon Sep 17 00:00:00 2001 From: Christian Sigg Date: Mon, 8 Jun 2020 22:18:04 -0700 Subject: [PATCH 102/178] Remove PTX for all but newest GPU architecture. CUBINs are compatible within the same major architecture, which means we only need to ship PTX to JIT for future architectures. PiperOrigin-RevId: 315419950 Change-Id: I3eddcadcbbfabb3694e56432a5f257e8f66c5835 --- tensorflow/tools/ci_build/release/common_win.bat | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh | 2 +- .../ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh | 2 +- tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh | 2 +- .../ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh | 2 +- .../ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh | 2 +- .../ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh | 2 +- .../ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh | 2 +- .../tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh | 2 +- 25 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat index 03217ce7e56..14b6a7e13fc 100644 --- a/tensorflow/tools/ci_build/release/common_win.bat +++ b/tensorflow/tools/ci_build/release/common_win.bat @@ -62,7 +62,7 @@ IF NOT DEFINED TF_CUDA_VERSION ( SET TF_CUDA_VERSION=10.1 ) SET TF_CUDNN_VERSION=7 -SET TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +SET TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 SET CUDA_TOOLKIT_PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v%TF_CUDA_VERSION% SET CUDNN_INSTALL_PATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v%TF_CUDA_VERSION% SET PATH=%CUDA_TOOLKIT_PATH%\extras\CUPTI\libx64;%PATH% diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh index a90a3e5a212..6e67bf20730 100755 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_pip_on_cpu/build.sh @@ -34,7 +34,7 @@ export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh index 3635f4289c8..079b683a6d5 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nightly_release.sh @@ -31,7 +31,7 @@ export TF_NEED_S3=1 export TF_NEED_CUDA=1 export TF_CUDA_VERSION=10 export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh index a886b42daa4..1dce4b101e3 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python2.7) export TF2_BEHAVIOR=1 export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh index f1807a4aae6..c8695bdfbdd 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/nonpip_v1.sh @@ -35,7 +35,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2.7) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh index dd618031c0d..1bf074dde4e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh index db0c3a22c06..ea00d9f7539 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py2_full/pip_v1.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh index ae5524f01ae..2ed5c014c65 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nightly_release.sh @@ -31,7 +31,7 @@ export TF_NEED_S3=1 export TF_NEED_CUDA=1 export TF_CUDA_VERSION=10 export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh index bdff1f654f8..d9a10c9551d 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python3.5) export TF2_BEHAVIOR=1 export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh index d957f7aef93..dea186ea62e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/nonpip_v1.sh @@ -35,7 +35,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.5) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh index 0e8cd8cd784..abf5c1db4b4 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh index 4bbbd50724b..a860decbe51 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py35_full/pip_v1.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh index 776f8d87ef8..87b2e52d88a 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nightly_release.sh @@ -31,7 +31,7 @@ export TF_NEED_S3=1 export TF_NEED_CUDA=1 export TF_CUDA_VERSION=10 export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh index 3fecf9abd29..547bb0a1fba 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python3.6) export TF2_BEHAVIOR=1 export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh index 734c2c50c4a..42ea884f790 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/nonpip_v1.sh @@ -35,7 +35,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.6) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh index 0b26173ca5f..17b52d9ce6b 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh index 484daa63cb8..2ae067c53ce 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py36_full/pip_v1.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh index 693d6d9e44f..0436ec32643 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nightly_release.sh @@ -31,7 +31,7 @@ export TF_NEED_S3=1 export TF_NEED_CUDA=1 export TF_CUDA_VERSION=10 export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh index ff11f954c67..54a72459fa1 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python3.7) export TF2_BEHAVIOR=1 export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh index 4788d40e93f..7c2a93f042e 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/nonpip_v1.sh @@ -35,7 +35,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python3.7) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh index 00047b775b1..2b17849b737 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh index 50cf3d61e4a..f6128448b99 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py37_full/pip_v1.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh index 7a1f24a29ec..783785db7f7 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nightly_release.sh @@ -33,7 +33,7 @@ export TF_NEED_S3=1 export TF_NEED_CUDA=1 export TF_CUDA_VERSION=10 export TF_CUDNN_VERSION=7 -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 export TF_NEED_TENSORRT=1 export TENSORRT_INSTALL_PATH=/usr/local/tensorrt export CC_OPT_FLAGS='-mavx' diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh index 917fbce7563..ab88f4712f0 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/nonpip.sh @@ -36,7 +36,7 @@ export PYTHON_BIN_PATH=$(which python3.8) export TF2_BEHAVIOR=1 export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh index 9aa5fdf68c8..1ba8c078021 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/gpu_py38_full/pip.sh @@ -40,7 +40,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which ${TF_PYTHON_VERSION}) export PROJECT_NAME="tensorflow_gpu" export LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$TENSORRT_INSTALL_PATH/lib" -export TF_CUDA_COMPUTE_CAPABILITIES=3.5,3.7,5.2,6.0,6.1,7.0 +export TF_CUDA_COMPUTE_CAPABILITIES=sm_35,sm_37,sm_52,sm_60,sm_61,compute_70 yes "" | "$PYTHON_BIN_PATH" configure.py From 6ddc7f8d99435c352b55aef79e7674973d6d3512 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Jun 2020 22:26:52 -0700 Subject: [PATCH 103/178] [XLA] Update layout tiling in some algebraic simplifier cases. PiperOrigin-RevId: 315421103 Change-Id: Ic4dee365811f0faa5678d846a1e1412663610062 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index e0a8b87c83b..cd4dc05ab09 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2973,6 +2973,7 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { // slice instruction should all have the same layout. TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( pad->shape(), nonzero_pad->mutable_shape())); + simplifier_->UpdateLayout(nonzero_pad->mutable_shape()); // Second, construct the slice instruction to perform the negative padding. std::vector start_indices; @@ -2999,9 +3000,14 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { MakeSliceHlo(nonzero_pad, start_indices, end_indices, strides)); TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( pad->shape(), slice->mutable_shape())); + simplifier_->UpdateLayout(slice->mutable_shape()); // Verify that the slice shape matches the pad shape. - TF_RET_CHECK(ShapeUtil::Equal(slice->shape(), pad->shape())); + auto equal = Shape::Equal(); + if (!options_.is_layout_sensitive()) { + equal.IgnoreTilesInLayout(); + } + TF_RET_CHECK(equal(slice->shape(), pad->shape())); return ReplaceInstruction(pad, slice); } From 47b4145e68d7b850c613c77a5e6adfbb955bba9b Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Mon, 8 Jun 2020 22:49:07 -0700 Subject: [PATCH 104/178] Separate out parse functionality into helper functions. Ops in this change: * Con2D * DepthwiseConv2D * Reshape PiperOrigin-RevId: 315424381 Change-Id: If2fb9187785eabd31b9d6588322cb70345650539 --- .../lite/core/api/flatbuffer_conversions.cc | 224 ++++++++++++------ .../lite/core/api/flatbuffer_conversions.h | 13 + 2 files changed, 162 insertions(+), 75 deletions(-) diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 5f39732b970..2a4dfbb6ff4 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -62,6 +62,17 @@ class SafeBuiltinDataAllocator { BuiltinDataAllocator* allocator_; }; +// All the Parse functions take some pointers as params and this function has +// the common DCHECKs to catch if any of those are nullptr. +void CheckParsePointerParams(const Operator* op, ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + TFLITE_DCHECK(op != nullptr); + TFLITE_DCHECK(error_reporter != nullptr); + TFLITE_DCHECK(allocator != nullptr); + TFLITE_DCHECK(builtin_data != nullptr); +} + // Copies the contents from the flatbuffer int vector `flatbuffer` into the // int array `buffer`. `flat_vector` and `buffer` represent the same // configuration operation for a given operation. @@ -109,6 +120,17 @@ TfLiteFusedActivation ConvertActivation(ActivationFunctionType activation) { return kTfLiteActNone; } +// Converts the flatbuffer padding enum to what is used at runtime. +TfLitePadding ConvertPadding(Padding padding) { + switch (padding) { + case Padding_SAME: + return kTfLitePaddingSame; + case Padding_VALID: + return kTfLitePaddingValid; + } + return kTfLitePaddingUnknown; +} + } // namespace TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, @@ -155,6 +177,74 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, } } +TfLiteStatus ParseConv2D(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const Conv2DOptions* schema_params = op->builtin_options_as_Conv2DOptions(); + + if (schema_params != nullptr) { + params->padding = ConvertPadding(schema_params->padding()); + params->stride_width = schema_params->stride_w(); + params->stride_height = schema_params->stride_h(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + + params->dilation_width_factor = schema_params->dilation_w_factor(); + params->dilation_height_factor = schema_params->dilation_h_factor(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + +TfLiteStatus ParseDepthwiseConv2D(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const DepthwiseConv2DOptions* schema_params = + op->builtin_options_as_DepthwiseConv2DOptions(); + + if (schema_params != nullptr) { + params->padding = ConvertPadding(schema_params->padding()); + params->stride_width = schema_params->stride_w(); + params->stride_height = schema_params->stride_h(); + params->depth_multiplier = schema_params->depth_multiplier(); + params->activation = + ConvertActivation(schema_params->fused_activation_function()); + + params->dilation_width_factor = schema_params->dilation_w_factor(); + params->dilation_height_factor = schema_params->dilation_h_factor(); + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + // We have this parse function instead of directly returning kTfLiteOk from the // switch-case in ParseOpData because this function is used as part of the // selective registration for the OpResolver implementation in micro. @@ -167,10 +257,7 @@ TfLiteStatus ParseFullyConnected(const Operator* op, BuiltinOperator, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data) { - TFLITE_DCHECK(op != nullptr); - TFLITE_DCHECK(error_reporter != nullptr); - TFLITE_DCHECK(allocator != nullptr); - TFLITE_DCHECK(builtin_data != nullptr); + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); SafeBuiltinDataAllocator safe_allocator(allocator); @@ -212,6 +299,47 @@ TfLiteStatus ParseFullyConnected(const Operator* op, BuiltinOperator, return kTfLiteOk; } +TfLiteStatus ParseReshape(const Operator* op, BuiltinOperator, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data) { + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); + + SafeBuiltinDataAllocator safe_allocator(allocator); + + std::unique_ptr + params = safe_allocator.Allocate(); + TF_LITE_ENSURE(error_reporter, params != nullptr); + + const ReshapeOptions* schema_params = op->builtin_options_as_ReshapeOptions(); + + if (schema_params != nullptr) { + const flatbuffers::Vector* new_shape = schema_params->new_shape(); + // TODO(b/147203660): We need to figure out when dynamic reshape + // (new_shape is a tensor) happens, why the option is not a nullptr. + // But nonethless, we should only copy when new_shape is not a nullptr. + if (new_shape != nullptr) { + TF_LITE_ENSURE_STATUS( + FlatBufferIntVectorToArray(sizeof(params->shape), new_shape, + params->shape, error_reporter, "reshape")); + params->num_dimensions = new_shape->size(); + } else { + // TODO(b/157480169) TODO(b/147203660): We should either return + // kTfLiteError or fill in some reasonable defaults in the params struct. + // We are not doing so until we better undertand the ramifications of + // changing the legacy behavior. + } + } else { + // TODO(b/157480169): We should either return kTfLiteError or fill in some + // reasonable defaults in the params struct. We are not doing so until we + // better undertand the ramifications of changing the legacy behavior. + } + + *builtin_data = params.release(); + return kTfLiteOk; +} + // We have this parse function instead of directly returning kTfLiteOk from the // switch-case in ParseOpData because this function is used as part of the // selective registration for the OpResolver implementation in micro. @@ -224,10 +352,7 @@ TfLiteStatus ParseSoftmax(const Operator* op, BuiltinOperator, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data) { - TFLITE_DCHECK(op != nullptr); - TFLITE_DCHECK(error_reporter != nullptr); - TFLITE_DCHECK(allocator != nullptr); - TFLITE_DCHECK(builtin_data != nullptr); + CheckParsePointerParams(op, error_reporter, allocator, builtin_data); SafeBuiltinDataAllocator safe_allocator(allocator); std::unique_ptr(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (auto* conv_params = op->builtin_options_as_Conv2DOptions()) { - params->padding = parse_padding(conv_params->padding()); - params->stride_width = conv_params->stride_w(); - params->stride_height = conv_params->stride_h(); - params->activation = - ConvertActivation(conv_params->fused_activation_function()); - - params->dilation_width_factor = conv_params->dilation_w_factor(); - params->dilation_height_factor = conv_params->dilation_h_factor(); - } - *builtin_data = params.release(); - return kTfLiteOk; - } case BuiltinOperator_CAST: { auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); @@ -386,7 +496,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); if (const auto* pool_params = op->builtin_options_as_Pool2DOptions()) { - params->padding = parse_padding(pool_params->padding()); + params->padding = ConvertPadding(pool_params->padding()); params->stride_width = pool_params->stride_w(); params->stride_height = pool_params->stride_h(); params->filter_width = pool_params->filter_width(); @@ -397,24 +507,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = params.release(); return kTfLiteOk; } - case BuiltinOperator_DEPTHWISE_CONV_2D: { - auto params = safe_allocator.Allocate(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (const auto* conv_params = - op->builtin_options_as_DepthwiseConv2DOptions()) { - params->padding = parse_padding(conv_params->padding()); - params->stride_width = conv_params->stride_w(); - params->stride_height = conv_params->stride_h(); - params->depth_multiplier = conv_params->depth_multiplier(); - params->activation = - ConvertActivation(conv_params->fused_activation_function()); - - params->dilation_width_factor = conv_params->dilation_w_factor(); - params->dilation_height_factor = conv_params->dilation_h_factor(); - } - *builtin_data = params.release(); - return kTfLiteOk; - } case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: { auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); @@ -644,24 +736,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = params.release(); return kTfLiteOk; } - case BuiltinOperator_RESHAPE: { - auto params = safe_allocator.Allocate(); - TF_LITE_ENSURE(error_reporter, params != nullptr); - if (const auto* schema_params = op->builtin_options_as_ReshapeOptions()) { - auto* new_shape = schema_params->new_shape(); - // TODO(b/147203660): We need to figure out when dynamic reshape - // (new_shape is a tensor) happens, why the option is not a nullptr. - // But nonethless, we should only copy when new_shape is not a nullptr. - if (new_shape) { - TF_LITE_ENSURE_STATUS(FlatBufferIntVectorToArray( - sizeof(params->shape), new_shape, params->shape, error_reporter, - "reshape")); - params->num_dimensions = new_shape->size(); - } - } - *builtin_data = params.release(); - return kTfLiteOk; - } case BuiltinOperator_SKIP_GRAM: { auto params = safe_allocator.Allocate(); TF_LITE_ENSURE(error_reporter, params != nullptr); @@ -791,7 +865,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, TF_LITE_ENSURE(error_reporter, params != nullptr); if (const auto* transpose_conv_params = op->builtin_options_as_TransposeConvOptions()) { - params->padding = parse_padding(transpose_conv_params->padding()); + params->padding = ConvertPadding(transpose_conv_params->padding()); params->stride_width = transpose_conv_params->stride_w(); params->stride_height = transpose_conv_params->stride_h(); } diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.h b/tensorflow/lite/core/api/flatbuffer_conversions.h index 45f2c9df3b7..78d2aca6222 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.h +++ b/tensorflow/lite/core/api/flatbuffer_conversions.h @@ -75,6 +75,15 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type, // removed once we are no longer using ParseOpData for the OpResolver // implementation in micro. +TfLiteStatus ParseConv2D(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + +TfLiteStatus ParseDepthwiseConv2D(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, + void** builtin_data); + TfLiteStatus ParseDequantize(const Operator* op, BuiltinOperator op_type, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, @@ -90,6 +99,10 @@ TfLiteStatus ParseQuantize(const Operator* op, BuiltinOperator op_type, BuiltinDataAllocator* allocator, void** builtin_data); +TfLiteStatus ParseReshape(const Operator* op, BuiltinOperator op_type, + ErrorReporter* error_reporter, + BuiltinDataAllocator* allocator, void** builtin_data); + TfLiteStatus ParseSoftmax(const Operator* op, BuiltinOperator op_type, ErrorReporter* error_reporter, BuiltinDataAllocator* allocator, void** builtin_data); From 33014a38d9825274478bd00fd83ef22761028b20 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Mon, 8 Jun 2020 23:01:36 -0700 Subject: [PATCH 105/178] [Core ML] Support FP16 in fully connected op PiperOrigin-RevId: 315425894 Change-Id: I5340adf7b70d3e6d51a9c1edb814e6a309e99a84 --- .../builders/fully_connected_op_builder.cc | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/tensorflow/lite/experimental/delegates/coreml/builders/fully_connected_op_builder.cc b/tensorflow/lite/experimental/delegates/coreml/builders/fully_connected_op_builder.cc index 2efc767d703..376830922c9 100644 --- a/tensorflow/lite/experimental/delegates/coreml/builders/fully_connected_op_builder.cc +++ b/tensorflow/lite/experimental/delegates/coreml/builders/fully_connected_op_builder.cc @@ -51,21 +51,37 @@ CoreML::Specification::NeuralNetworkLayer* FullyConnectedOpBuilder::Build() { void FullyConnectedOpBuilder::FillCoreMLWeights() { layer_->mutable_innerproduct()->set_inputchannels(weights_->dims->data[1]); layer_->mutable_innerproduct()->set_outputchannels(weights_->dims->data[0]); - const float* weights_data = GetTensorData(weights_); - std::copy(weights_data, weights_data + NumElements(weights_), - google::protobuf::RepeatedFieldBackInserter(layer_->mutable_innerproduct() - ->mutable_weights() - ->mutable_floatvalue())); + if (weights_->type == kTfLiteFloat32) { + const float* weights_data = GetTensorData(weights_); + std::copy(weights_data, weights_data + NumElements(weights_), + google::protobuf::RepeatedFieldBackInserter(layer_->mutable_innerproduct() + ->mutable_weights() + ->mutable_floatvalue())); + } else if (weights_->type == kTfLiteFloat16) { + // float16value has type of bytes (std::string) + layer_->mutable_innerproduct() + ->mutable_weights() + ->mutable_float16value() + ->assign(weights_->data.raw, weights_->bytes); + } } void FullyConnectedOpBuilder::FillCoreMLBias() { if (bias_ != nullptr) { layer_->mutable_innerproduct()->set_hasbias(true); - const float* bias_data = GetTensorData(bias_); - std::copy(bias_data, bias_data + NumElements(bias_), - google::protobuf::RepeatedFieldBackInserter(layer_->mutable_innerproduct() - ->mutable_bias() - ->mutable_floatvalue())); + if (bias_->type == kTfLiteFloat32) { + const float* bias_data = GetTensorData(bias_); + std::copy(bias_data, bias_data + NumElements(bias_), + google::protobuf::RepeatedFieldBackInserter(layer_->mutable_innerproduct() + ->mutable_bias() + ->mutable_floatvalue())); + } else if (bias_->type == kTfLiteFloat16) { + // float16value has type of bytes (std::string) + layer_->mutable_innerproduct() + ->mutable_bias() + ->mutable_float16value() + ->assign(bias_->data.raw, bias_->bytes); + } } } @@ -120,6 +136,10 @@ OpBuilder* CreateFullyConnectedOpBuilder(GraphBuilder* graph_builder) { return new FullyConnectedOpBuilder(graph_builder); } +bool IsFloatType(TfLiteType type) { + return type == kTfLiteFloat32 || type == kTfLiteFloat16; +} + bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, const TfLiteNode* node, TfLiteContext* context) { @@ -136,10 +156,10 @@ bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, const TfLiteTensor* input = GetInput(context, node, kInput); const TfLiteTensor* weights = GetInput(context, node, kWeights); - if (input->type != kTfLiteFloat32) { + if (!IsFloatType(input->type)) { return false; } - if (weights->type != kTfLiteFloat32 || !IsConstantTensor(weights)) { + if (!IsFloatType(weights->type) || !IsConstantTensor(weights)) { return false; } // Core ML 2 only supports single-batch fully connected layer, thus dimensions @@ -150,7 +170,7 @@ bool IsFullyConnectedOpSupported(const TfLiteRegistration* registration, if (node->inputs->size > 2) { const TfLiteTensor* bias = GetInput(context, node, kBias); - if (bias->type != kTfLiteFloat32 || !IsConstantTensor(bias)) { + if (!IsFloatType(bias->type) || !IsConstantTensor(bias)) { return false; } } From d9e5e2f7b34f803c11d0b7a6e9c3100e6d139d74 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Mon, 8 Jun 2020 23:06:23 -0700 Subject: [PATCH 106/178] Docstring fixes for cluster resolvers. PiperOrigin-RevId: 315426402 Change-Id: I9a8982af6a2fe0538f9af3812572db55b29525b8 --- .../cluster_resolver/cluster_resolver.py | 89 ++++++++++++++++--- .../cluster_resolver/gce_cluster_resolver.py | 23 +++++ .../kubernetes_cluster_resolver.py | 27 ++++++ .../tfconfig_cluster_resolver.py | 26 ++++++ 4 files changed, 154 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py index e15b6ab01f8..a8babc21af6 100644 --- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py @@ -63,7 +63,8 @@ class ClusterResolver(object): This defines the skeleton for all implementations of ClusterResolvers. ClusterResolvers are a way for TensorFlow to communicate with various cluster - management systems (e.g. GCE, AWS, etc...). + management systems (e.g. GCE, AWS, etc...) and gives TensorFlow necessary + information to set up distributed training. By letting TensorFlow communicate with these systems, we will be able to automatically discover and resolve IP addresses for various TensorFlow @@ -73,7 +74,8 @@ class ClusterResolver(object): Note to Implementors: In addition to these abstract methods, you must also implement the task_type, task_id, and rpc_layer attributes. You may choose to implement them either as properties with getters or setters or directly - set the attributes. + set the attributes. The task_type and task_id attributes are required by + `tf.distribute.experimental.MultiWorkerMirroredStrategy`. - task_type is the name of the server's current named job (e.g. 'worker', 'ps' in a distributed parameterized training job). @@ -84,11 +86,11 @@ class ClusterResolver(object): @abc.abstractmethod def cluster_spec(self): - """Retrieve the current state of the cluster and return a ClusterSpec. + """Retrieve the current state of the cluster and return a `tf.train.ClusterSpec`. Returns: - A ClusterSpec representing the state of the cluster at the moment this - function is called. + A `tf.train.ClusterSpec` representing the state of the cluster at the + moment this function is called. Implementors of this function must take care in ensuring that the ClusterSpec returned is up-to-date at the time of calling this function. @@ -102,6 +104,8 @@ class ClusterResolver(object): def master(self, task_type=None, task_id=None, rpc_layer=None): """Retrieves the name or URL of the session master. + Note: this is only useful for TensorFlow 1.x. + Args: task_type: (Optional) The type of the TensorFlow task of the master. task_id: (Optional) The index of the TensorFlow task of the master. @@ -126,7 +130,7 @@ class ClusterResolver(object): available per worker. Optionally, we allow callers to specify the task_type, and task_id, for - if they want to target a specific TensorFlow process to query + if they want to target a specific TensorFlow task to query the number of accelerators. This is to support heterogenous environments, where the number of accelerators cores per host is different. @@ -142,6 +146,8 @@ class ClusterResolver(object): A map of accelerator types to number of cores. """ master = self.master(task_type, task_id) + # TODO(b/126786766): in eager mode, we should check whether + # `tf.config.experimental_connect_to_cluster` is called or not. devices = get_accelerator_devices(master, config_proto) mapping = collections.defaultdict(int) for device in devices: @@ -174,7 +180,35 @@ class ClusterResolver(object): @tf_export('distribute.cluster_resolver.SimpleClusterResolver') class SimpleClusterResolver(ClusterResolver): - """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" + """Simple implementation of ClusterResolver that accepts all attributes. + + Please see the base class for documentation of arguments of its constructor. + + It is useful if you want to specify some or all attributes. + + Usage example with `tf.distribute.Strategy`: + + ```Python + cluster = tf.train.ClusterSpec({"worker": ["worker0.example.com:2222", + "worker1.example.com:2222"]}) + + # On worker 0 + cluster_resolver = SimpleClusterResolver(cluster, task_type="worker", + task_id=0, + num_accelerators={"GPU": 8}, + rpc_layer="grpc") + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + + # On worker 1 + cluster_resolver = SimpleClusterResolver(cluster, task_type="worker", + task_id=1, + num_accelerators={"GPU": 8}, + rpc_layer="grpc") + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + ``` + """ def __init__(self, cluster_spec, master='', task_type=None, task_id=None, environment='', num_accelerators=None, @@ -190,7 +224,7 @@ class SimpleClusterResolver(ClusterResolver): self._rpc_layer = rpc_layer if not isinstance(cluster_spec, ClusterSpec): - raise TypeError('cluster_spec must be a ClusterSpec.') + raise TypeError('cluster_spec must be a `tf.train.ClusterSpec`.') self._cluster_spec = cluster_spec if not isinstance(master, str): @@ -204,6 +238,8 @@ class SimpleClusterResolver(ClusterResolver): def master(self, task_type=None, task_id=None, rpc_layer=None): """Returns the master address to use when creating a session. + Note: this is only useful for TensorFlow 1.x. + Args: task_type: (Optional) The type of the TensorFlow task of the master. task_id: (Optional) The index of the TensorFlow task of the master. @@ -249,9 +285,8 @@ class SimpleClusterResolver(ClusterResolver): """Returns the number of accelerator cores per worker. The SimpleClusterResolver does not do automatic detection of accelerators, - so a TensorFlow session will never be created, and thus all arguments are - unused and we simply assume that the type of accelerator is a GPU and return - the value in provided to us in the constructor. + and thus all arguments are unused and we simply return the value provided + in the constructor. Args: task_type: Unused. @@ -285,6 +320,36 @@ class UnionClusterResolver(ClusterResolver): For additional ClusterResolver properties such as task type, task index, rpc layer, environment, etc..., we will return the value from the first ClusterResolver in the union. + + An example to combine two cluster resolvers: + + ```Python + cluster_0 = tf.train.ClusterSpec({"worker": ["worker0.example.com:2222", + "worker1.example.com:2222"]}) + cluster_resolver_0 = SimpleClusterResolver(cluster, task_type="worker", + task_id=0, + rpc_layer="grpc") + + cluster_1 = tf.train.ClusterSpec({"ps": ["ps0.example.com:2222", + "ps1.example.com:2222"]}) + cluster_resolver_1 = SimpleClusterResolver(cluster, task_type="ps", + task_id=0, + rpc_layer="grpc") + + # Its task type would be "worker". + cluster_resolver = UnionClusterResolver(cluster_resolver_0, + cluster_resolver_1) + ``` + + An example to override the number of GPUs in a TFConfigClusterResolver + instance: + + ```Python + tf_config = TFConfigClusterResolver() + gpu_override = SimpleClusterResolver(tf_config.cluster_spec(), + num_accelerators={"GPU": 1}) + cluster_resolver = UnionResolver(gpu_override, tf_config) + ``` """ def __init__(self, *args, **kwargs): @@ -400,6 +465,8 @@ class UnionClusterResolver(ClusterResolver): This usually returns the master from the first ClusterResolver passed in, but you can override this by specifying the task_type and task_id. + Note: this is only useful for TensorFlow 1.x. + Args: task_type: (Optional) The type of the TensorFlow task of the master. task_id: (Optional) The index of the TensorFlow task of the master. diff --git a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py index 70d42e80a70..14548ed4350 100644 --- a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py @@ -40,6 +40,29 @@ class GCEClusterResolver(ClusterResolver): this will retrieve the IP address of all the instances within the instance group and return a ClusterResolver object suitable for use for distributed TensorFlow. + + Note: this cluster resolver cannot retrieve `task_type`, `task_id` or + `rpc_layer`. To use it with some distribution strategies like + `tf.distribute.experimental.MultiWorkerMirroredStrategy`, you will need to + specify `task_type` and `task_id` in the constructor. + + Usage example with tf.distribute.Strategy: + + ```Python + # On worker 0 + cluster_resolver = GCEClusterResolver("my-project", "us-west1", + "my-instance-group", + task_type="worker", task_id=0) + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + + # On worker 1 + cluster_resolver = GCEClusterResolver("my-project", "us-west1", + "my-instance-group", + task_type="worker", task_id=1) + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + ``` """ def __init__(self, diff --git a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py index f812df0e5c7..27dda7977f6 100644 --- a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py @@ -39,6 +39,31 @@ class KubernetesClusterResolver(ClusterResolver): the Kubernetes namespace and label selector for pods, we will retrieve the pod IP addresses of all running pods matching the selector, and return a ClusterSpec based on that information. + + Note: it cannot retrieve `task_type`, `task_id` or `rpc_layer`. To use it + with some distribution strategies like + `tf.distribute.experimental.MultiWorkerMirroredStrategy`, you will need to + specify `task_type` and `task_id` by setting these attributes. + + Usage example with tf.distribute.Strategy: + + ```Python + # On worker 0 + cluster_resolver = KubernetesClusterResolver( + {"worker": ["job-name=worker-cluster-a", "job-name=worker-cluster-b"]}) + cluster_resolver.task_type = "worker" + cluster_resolver.task_id = 0 + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + + # On worker 1 + cluster_resolver = KubernetesClusterResolver( + {"worker": ["job-name=worker-cluster-a", "job-name=worker-cluster-b"]}) + cluster_resolver.task_type = "worker" + cluster_resolver.task_id = 1 + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=cluster_resolver) + ``` """ def __init__(self, @@ -101,6 +126,8 @@ class KubernetesClusterResolver(ClusterResolver): parameters when using this function. If you do both, the function parameters will override the object properties. + Note: this is only useful for TensorFlow 1.x. + Args: task_type: (Optional) The type of the TensorFlow task of the master. task_id: (Optional) The index of the TensorFlow task of the master. diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py index 305af265b03..30063d090d1 100644 --- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py +++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py @@ -55,6 +55,30 @@ class TFConfigClusterResolver(ClusterResolver): This is an implementation of cluster resolvers when using TF_CONFIG to set information about the cluster. The cluster spec returned will be initialized from the TF_CONFIG environment variable. + + An example to set TF_CONFIG is: + + ```Python + os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': 0} + }) + ``` + + However, sometimes the container orchestration framework will set TF_CONFIG + for you. In this case, you can just create an instance without passing in any + arguments. You can find an example here to let Kuburnetes set TF_CONFIG for + you: https://github.com/tensorflow/ecosystem/tree/master/kubernetes. Then you + can use it with `tf.distribute.Strategy` as: + + ```Python + # `TFConfigClusterResolver` is already the default one in the following + # strategy. + strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( + cluster_resolver=TFConfigClusterResolver()) + ``` """ def __init__(self, @@ -140,6 +164,8 @@ class TFConfigClusterResolver(ClusterResolver): def master(self, task_type=None, task_id=None, rpc_layer=None): """Returns the master address to use when creating a TensorFlow session. + Note: this is only useful for TensorFlow 1.x. + Args: task_type: (String, optional) Overrides and sets the task_type of the master. From 13c09da4222e6b3367dabf1e43cdd497bce72c79 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 8 Jun 2020 23:59:12 -0700 Subject: [PATCH 107/178] Open sourcing some TPU-related work PiperOrigin-RevId: 315431095 Change-Id: I734632c0e5723dfca37acf53bbbd2b378b04c95d --- tensorflow/core/tpu/graph_rewrite/BUILD | 55 ++ ...tributed_tpu_configuration_rewrite_pass.cc | 402 +++++++++ ...stributed_tpu_configuration_rewrite_pass.h | 51 ++ ..._tpu_configuration_rewrite_registration.cc | 29 + .../distributed_tpu_rewrite_helpers.cc | 255 ++++++ .../distributed_tpu_rewrite_helpers.h | 98 +++ tensorflow/core/tpu/kernels/BUILD | 288 +++++++ .../tpu/kernels/tpu_compilation_cache.proto | 25 + .../tpu/kernels/tpu_compilation_cache_entry.h | 84 ++ .../kernels/tpu_compilation_cache_external.cc | 791 ++++++++++++++++++ .../kernels/tpu_compilation_cache_external.h | 394 +++++++++ .../tpu/kernels/tpu_compilation_cache_key.h | 53 ++ .../kernels/tpu_compilation_cache_lookup.cc | 93 ++ .../kernels/tpu_compilation_cache_lookup.h | 99 +++ .../kernels/tpu_compilation_cache_metrics.cc | 32 + .../kernels/tpu_compilation_cache_metrics.h | 38 + tensorflow/core/tpu/kernels/tpu_compile.proto | 144 ++++ .../core/tpu/kernels/tpu_compile_c_api.h | 119 +++ .../tpu/kernels/tpu_compile_op_options.cc | 42 + .../core/tpu/kernels/tpu_compile_op_options.h | 42 + .../tpu/kernels/tpu_compile_op_support.cc | 439 ++++++++++ .../core/tpu/kernels/tpu_compile_op_support.h | 122 +++ .../core/tpu/kernels/tpu_configuration_ops.cc | 298 +++++++ .../core/tpu/kernels/tpu_configuration_ops.h | 156 ++++ .../tpu/kernels/tpu_executable_info.proto | 94 +++ .../core/tpu/kernels/tpu_mesh_state_c_api.h | 30 + .../tpu/kernels/tpu_mesh_state_interface.h | 78 ++ tensorflow/core/tpu/kernels/tpu_program.cc | 201 +++++ tensorflow/core/tpu/kernels/tpu_program.h | 161 ++++ tensorflow/core/tpu/kernels/tpu_util.cc | 100 +++ tensorflow/core/tpu/kernels/tpu_util.h | 67 ++ tensorflow/core/tpu/kernels/trace_util.h | 27 + tensorflow/core/tpu/tpu_library_loader.cc | 7 + tensorflow/stream_executor/tpu/BUILD | 234 ++++++ .../stream_executor/tpu/c_api_conversions.h | 115 +++ .../stream_executor/tpu/proto_helper.cc | 27 + tensorflow/stream_executor/tpu/proto_helper.h | 85 ++ .../stream_executor/tpu/status_helper.h | 38 + .../tpu/tpu_computation_placer.cc | 51 ++ .../tpu/tpu_computation_placer.h | 41 + .../stream_executor/tpu/tpu_executor.cc | 355 ++++++++ tensorflow/stream_executor/tpu/tpu_executor.h | 241 ++++++ .../stream_executor/tpu/tpu_executor_c_api.h | 293 +++++++ .../tpu/tpu_executor_interface.h | 64 ++ .../stream_executor/tpu/tpu_node_context.cc | 100 +++ .../stream_executor/tpu/tpu_node_context.h | 89 ++ .../tpu/tpu_node_context_c_api.h | 29 + .../stream_executor/tpu/tpu_platform.cc | 125 +++ tensorflow/stream_executor/tpu/tpu_platform.h | 121 +++ .../tpu/tpu_platform_interface.cc | 63 ++ .../tpu/tpu_platform_interface.h | 44 + tensorflow/stream_executor/tpu/tpu_stream.h | 40 + .../tpu/tpu_stream_interface.h | 30 + tensorflow/stream_executor/tpu/tpu_timer.h | 38 + .../tpu/tpu_transfer_manager.cc | 167 ++++ .../tpu/tpu_transfer_manager.h | 83 ++ .../tpu/tpu_transfer_manager_registration.cc | 35 + 57 files changed, 7422 insertions(+) create mode 100644 tensorflow/core/tpu/graph_rewrite/BUILD create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.cc create mode 100644 tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h create mode 100644 tensorflow/core/tpu/kernels/BUILD create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache.proto create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compile.proto create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_c_api.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_options.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_options.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_support.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_compile_op_support.h create mode 100644 tensorflow/core/tpu/kernels/tpu_configuration_ops.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_configuration_ops.h create mode 100644 tensorflow/core/tpu/kernels/tpu_executable_info.proto create mode 100644 tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h create mode 100644 tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h create mode 100644 tensorflow/core/tpu/kernels/tpu_program.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_program.h create mode 100644 tensorflow/core/tpu/kernels/tpu_util.cc create mode 100644 tensorflow/core/tpu/kernels/tpu_util.h create mode 100644 tensorflow/core/tpu/kernels/trace_util.h create mode 100644 tensorflow/stream_executor/tpu/BUILD create mode 100644 tensorflow/stream_executor/tpu/c_api_conversions.h create mode 100644 tensorflow/stream_executor/tpu/proto_helper.cc create mode 100644 tensorflow/stream_executor/tpu/proto_helper.h create mode 100644 tensorflow/stream_executor/tpu/status_helper.h create mode 100644 tensorflow/stream_executor/tpu/tpu_computation_placer.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_computation_placer.h create mode 100644 tensorflow/stream_executor/tpu/tpu_executor.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_executor.h create mode 100644 tensorflow/stream_executor/tpu/tpu_executor_c_api.h create mode 100644 tensorflow/stream_executor/tpu/tpu_executor_interface.h create mode 100644 tensorflow/stream_executor/tpu/tpu_node_context.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_node_context.h create mode 100644 tensorflow/stream_executor/tpu/tpu_node_context_c_api.h create mode 100644 tensorflow/stream_executor/tpu/tpu_platform.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_platform.h create mode 100644 tensorflow/stream_executor/tpu/tpu_platform_interface.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_platform_interface.h create mode 100644 tensorflow/stream_executor/tpu/tpu_stream.h create mode 100644 tensorflow/stream_executor/tpu/tpu_stream_interface.h create mode 100644 tensorflow/stream_executor/tpu/tpu_timer.h create mode 100644 tensorflow/stream_executor/tpu/tpu_transfer_manager.cc create mode 100644 tensorflow/stream_executor/tpu/tpu_transfer_manager.h create mode 100644 tensorflow/stream_executor/tpu/tpu_transfer_manager_registration.cc diff --git a/tensorflow/core/tpu/graph_rewrite/BUILD b/tensorflow/core/tpu/graph_rewrite/BUILD new file mode 100644 index 00000000000..ef9e4a0a41e --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/BUILD @@ -0,0 +1,55 @@ +# Contains graph rewrites for TPU runtimes and optimizations. + +package( + default_visibility = [ + "//tensorflow/core/tpu:__subpackages__", + "//tensorflow/stream_executor/tpu:__subpackages__", + ], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "distributed_tpu_configuration_rewrite_registration", + srcs = ["distributed_tpu_configuration_rewrite_registration.cc"], + deps = [ + ":distributed_tpu_configuration_rewrite_pass", + "//tensorflow/core:core_cpu", + ], + alwayslink = 1, +) + +cc_library( + name = "distributed_tpu_configuration_rewrite_pass", + srcs = [ + "distributed_tpu_configuration_rewrite_pass.cc", + ], + hdrs = [ + "distributed_tpu_configuration_rewrite_pass.h", + ], + deps = [ + ":distributed_tpu_rewrite_helpers", + "//tensorflow/cc:scope", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core/protobuf/tpu:topology_proto_cc", + "//tensorflow/core/tpu:tpu_init_mode", + "//tensorflow/core/tpu/kernels:tpu_compile_op_options", + ], +) + +cc_library( + name = "distributed_tpu_rewrite_helpers", + srcs = ["distributed_tpu_rewrite_helpers.cc"], + hdrs = ["distributed_tpu_rewrite_helpers.h"], + deps = [ + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/core:core_cpu_lib", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/tpu:tpu_defs", + ], +) diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.cc new file mode 100644 index 00000000000..3b1e9d79705 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.cc @@ -0,0 +1,402 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Configuration for distributed TPU jobs + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" + +#include + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/device_set.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/common_runtime/graph_constructor.h" +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_options.h" +#include "tensorflow/core/tpu/tpu_init_mode.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/dump_graph.h" + +namespace tensorflow { +namespace { + +constexpr char kIdentityOp[] = "Identity"; +constexpr char kConfigureOp[] = "ConfigureDistributedTPU"; +constexpr char kInternalConfigureOp[] = "_ConfigureDistributedTPU"; +constexpr char kWaitOp[] = "_WaitForDistributedTPU"; +constexpr char kHostConfigureOp[] = "_InitializeHostForDistributedTPU"; +constexpr char kGlobalTPUArrayOp[] = "_SetGlobalTPUArray"; +constexpr char kShutdownOp[] = "ShutdownDistributedTPU"; +constexpr char kInternalShutdownOp[] = "_ShutdownDistributedTPU"; +constexpr char kHostDisconnectOp[] = "_DisconnectHostFromDistributedTPUSystem"; +constexpr char kEmbeddingConfigurationAttr[] = "embedding_config"; +constexpr int kDefaultStartupTimeout = 20; + +Status AddConfigurationNode(const string& configuration_device_name, + int number_of_hosts, Graph* graph, + bool enable_whole_mesh_compilations, + Node** configuration_node) { + NodeDef config_def; + config_def.set_name(graph->NewName("configure_distributed_tpu")); + config_def.set_op(kInternalConfigureOp); + config_def.set_device(configuration_device_name); + AddNodeAttr("N", number_of_hosts, &config_def); + AddNodeAttr("enable_whole_mesh_compilations", enable_whole_mesh_compilations, + &config_def); + // TODO(shikharagarwal): Fill with appropriate original node debug info. + + Status status; + *configuration_node = graph->AddNode(config_def, &status); + if (!status.ok()) { + return status; + } + (*configuration_node)->set_assigned_device_name(configuration_device_name); + return Status::OK(); +} + +Status AddHostConfigNode(const string& host_device_name, + Node* configuration_node, Graph* graph, + bool enable_whole_mesh_compilations, + Node** host_configuration_node) { + NodeDef host_config_def; + host_config_def.set_name(graph->NewName("configure_tpu_host")); + host_config_def.set_op(kHostConfigureOp); + host_config_def.set_device(host_device_name); + AddNodeAttr("enable_whole_mesh_compilations", enable_whole_mesh_compilations, + &host_config_def); + MergeDebugInfo(NodeDebugInfo(configuration_node->def()), &host_config_def); + + Status status; + *host_configuration_node = graph->AddNode(host_config_def, &status); + if (!status.ok()) { + return status; + } + (*host_configuration_node)->set_assigned_device_name(host_device_name); + graph->AddEdge(configuration_node, 0, *host_configuration_node, 0); + return Status::OK(); +} + +Status AddWaitNode(const string& configuration_device_name, + const std::vector& host_configuration_nodes, + Graph* graph, Node** wait_node) { + NodeDef wait_def; + wait_def.set_name(graph->NewName("wait_for_distributed_tpu_system")); + wait_def.set_op(kWaitOp); + wait_def.set_device(configuration_device_name); + AddNodeAttr("N", static_cast(host_configuration_nodes.size()), + &wait_def); + AddNodeAttr("startup_timeout_sec", kDefaultStartupTimeout, &wait_def); + if (!host_configuration_nodes.empty()) { + MergeDebugInfo(NodeDebugInfo(host_configuration_nodes[0]->def()), + &wait_def); + } + + Status status; + *wait_node = graph->AddNode(wait_def, &status); + if (!status.ok()) { + return status; + } + (*wait_node)->set_assigned_device_name(configuration_device_name); + // Get the inputs from the host configuration nodes. + for (int i = 0; i < host_configuration_nodes.size(); ++i) { + graph->AddEdge(host_configuration_nodes[i], 0, *wait_node, i); + } + return Status::OK(); +} + +Status AddGlobalTPUArrayNode(const string& host_device_name, Node* wait_node, + Graph* graph, Node** global_tpu_array_node) { + NodeDef global_tpu_array_def; + global_tpu_array_def.set_name(graph->NewName("set_global_tpu_array")); + global_tpu_array_def.set_op(kGlobalTPUArrayOp); + global_tpu_array_def.set_device(host_device_name); + MergeDebugInfo(NodeDebugInfo(wait_node->def()), &global_tpu_array_def); + + Status status; + *global_tpu_array_node = graph->AddNode(global_tpu_array_def, &status); + if (!status.ok()) { + return status; + } + (*global_tpu_array_node)->set_assigned_device_name(host_device_name); + graph->AddEdge(wait_node, 0, *global_tpu_array_node, 0); + return Status::OK(); +} + +Status AddSynchronizationNode( + const NodeDef& sync_node_def, const string& device_name, + const std::vector& global_array_id_nodes, Node* wait_node, + const std::vector& + output_dependencies, + Graph* graph) { + NodeDef sync_def; + sync_def.set_name(sync_node_def.name()); + sync_def.set_op(kIdentityOp); + sync_def.set_device(device_name); + AddNodeAttr("T", DT_STRING, &sync_def); + MergeDebugInfo(NodeDebugInfo(sync_node_def), &sync_def); + + Status status; + Node* sync_node = graph->AddNode(sync_def, &status); + if (!status.ok()) { + return status; + } + sync_node->set_assigned_device_name(device_name); + // Add control edges from the global array id nodes. + for (auto node : global_array_id_nodes) { + graph->AddControlEdge(node, sync_node); + } + // Forward the data from the wait node. + graph->AddEdge(wait_node, 0, sync_node, 0); + // Replace the output edges. + for (const DistributedTPURewriteHelpers::OutputDependency& dep : + output_dependencies) { + if (dep.dst_input == Graph::kControlSlot) { + graph->AddControlEdge(sync_node, dep.dst); + } else { + graph->AddEdge(sync_node, dep.src_output, dep.dst, dep.dst_input); + } + } + return Status::OK(); +} + + +Status AddShutdownNode( + const NodeDef& shutdown_node_def, const string& shutdown_device_name, + const std::vector& + output_dependencies, + Graph* graph, Node** shutdown_node) { + NodeDef shutdown_def; + shutdown_def.set_name(shutdown_node_def.name()); + shutdown_def.set_op(kInternalShutdownOp); + shutdown_def.set_device(shutdown_device_name); + MergeDebugInfo(NodeDebugInfo(shutdown_node_def), &shutdown_def); + + Status status; + *shutdown_node = graph->AddNode(shutdown_def, &status); + if (!status.ok()) { + return status; + } + (*shutdown_node)->set_assigned_device_name(shutdown_device_name); + // Replace the output control edges. + for (const DistributedTPURewriteHelpers::OutputDependency& dep : + output_dependencies) { + if (dep.dst_input != Graph::kControlSlot) { + return errors::Internal("Shutdown node had non-control edge output"); + } + graph->AddControlEdge(*shutdown_node, dep.dst); + } + return Status::OK(); +} + +Status AddHostDisconnectNode(const string& host_device_name, + const std::vector& input_dependencies, + Node* post_disconnect_node, int output_index, + Graph* graph) { + NodeDef host_disconnect_def; + host_disconnect_def.set_name(graph->NewName("disconnect_tpu_host")); + host_disconnect_def.set_op(kHostDisconnectOp); + host_disconnect_def.set_device(host_device_name); + MergeDebugInfo(NodeDebugInfo(post_disconnect_node->def()), + &host_disconnect_def); + + Status status; + Node* host_disconnect_node = graph->AddNode(host_disconnect_def, &status); + if (!status.ok()) { + return status; + } + host_disconnect_node->set_assigned_device_name(host_device_name); + // Replace the input control edges. + for (Node* src_node : input_dependencies) { + graph->AddControlEdge(src_node, host_disconnect_node); + } + if (output_index == -1) { + graph->AddControlEdge(host_disconnect_node, post_disconnect_node); + } else { + graph->AddEdge(host_disconnect_node, 0, post_disconnect_node, output_index); + } + return Status::OK(); +} + +} // namespace + +Status DistributedTPUConfigurationRewritePass::Run( + const GraphOptimizationPassOptions& options) { + VLOG(1) << "DistributedTPUConfigurationRewritePass::Run"; + + Graph* graph = options.graph->get(); + + if (VLOG_IS_ON(1)) { + DumpGraphToFile("distributed_tpu_configuration_before", *graph, + options.flib_def); + } + + // This pass can only run in the session master, which should fill + // in the device_set field to the options. + TF_RET_CHECK(options.device_set != nullptr); + + TF_RETURN_IF_ERROR( + DistributedTPURewriteHelpers::ForConfigurationNodeMatchingType( + kConfigureOp, graph, *options.device_set, + [](const NodeDef& configuration_node_def, + const string& configuration_device_name, + const std::vector& host_devices, + const std::vector& input_dependencies, + const std::vector& + output_dependencies, + Graph* graph) -> Status { + const std::string& embedding_attr_string = GetNodeAttrString( + AttrSlice(configuration_node_def), kEmbeddingConfigurationAttr); + + if (!embedding_attr_string.empty()) { + return errors::InvalidArgument("embedding_config must be empty."); + } + + bool is_global_init = false; + bool enable_whole_mesh_compilations = false; + TF_RETURN_IF_ERROR(GetNodeAttr(configuration_node_def, + "is_global_init", &is_global_init)); + TryGetNodeAttr(configuration_node_def, + "enable_whole_mesh_compilations", + &enable_whole_mesh_compilations); + TF_RETURN_IF_ERROR(SetTPUInitMode( + is_global_init ? TPUInitMode::kGlobal : TPUInitMode::kRegular)); + + bool compilation_failure_closes_chips; + TF_RETURN_IF_ERROR(GetNodeAttr(configuration_node_def, + "compilation_failure_closes_chips", + &compilation_failure_closes_chips)); + internal::SetTpuCompilationFailureClosesChips( + compilation_failure_closes_chips); + + // Add the global TPU system configuration node. + Node* configuration_node; + TF_RETURN_IF_ERROR(AddConfigurationNode( + configuration_device_name, host_devices.size(), graph, + enable_whole_mesh_compilations, &configuration_node)); + + // Add the host disconnect nodes. + for (int i = 0; i < host_devices.size(); ++i) { + const auto host_device = host_devices[i]; + TF_RETURN_IF_ERROR( + AddHostDisconnectNode(host_device->name(), input_dependencies, + configuration_node, i, graph)); + } + + // Add the host configuration nodes. + std::vector host_configuration_nodes; + for (const auto host_device : host_devices) { + Node* host_configuration_node; + TF_RETURN_IF_ERROR(AddHostConfigNode( + host_device->name(), configuration_node, graph, + enable_whole_mesh_compilations, &host_configuration_node)); + host_configuration_nodes.push_back(host_configuration_node); + } + + // Add the node to wait for the system configuration to + // stabilize. Use the name of the original dummy Op in case it was + // the target of a Session::Run call. + Node* wait_node; + TF_RETURN_IF_ERROR(AddWaitNode(configuration_device_name, + host_configuration_nodes, graph, + &wait_node)); + + // Add the nodes to set the global TPU ids at each host. + std::vector global_array_id_nodes; + for (const auto host_device : host_devices) { + Node* global_array_id_node; + TF_RETURN_IF_ERROR(AddGlobalTPUArrayNode(host_device->name(), + wait_node, graph, + &global_array_id_node)); + global_array_id_nodes.push_back(global_array_id_node); + } + + if (host_devices.empty()) { + return errors::InvalidArgument("TPU job contains no CPU devices"); + } + TF_RET_CHECK(!host_devices.empty()); + + TF_RETURN_IF_ERROR(AddSynchronizationNode( + configuration_node_def, host_devices.front()->name(), + global_array_id_nodes, wait_node, output_dependencies, graph)); + + return Status::OK(); + })); + + if (VLOG_IS_ON(1)) { + DumpGraphToFile("distributed_tpu_configuration_after", *graph, + options.flib_def); + } + + VLOG(1) << "DistributedTPUConfigurationRewritePass::Run() finished"; + return Status::OK(); +} + +Status DistributedTPUShutdownRewritePass::Run( + const GraphOptimizationPassOptions& options) { + VLOG(1) << "DistributedTPUShutdownRewritePass::Run"; + + Graph* graph = options.graph->get(); + + if (VLOG_IS_ON(1)) { + DumpGraphToFile("distributed_tpu_shutdown_before", *graph, + options.flib_def); + } + + // This pass can only run in the session master, which should fill + // in the device_set field to the options. + TF_RET_CHECK(options.device_set != nullptr); + + TF_RETURN_IF_ERROR( + DistributedTPURewriteHelpers::ForConfigurationNodeMatchingType( + kShutdownOp, graph, *options.device_set, + [](const NodeDef& shutdown_node_def, + const string& shutdown_device_name, + const std::vector& host_devices, + const std::vector& input_dependencies, + const std::vector& + output_dependencies, + Graph* graph) -> Status { + Node* shutdown_node; + TF_RETURN_IF_ERROR( + AddShutdownNode(shutdown_node_def, shutdown_device_name, + output_dependencies, graph, &shutdown_node)); + + // Add the host disconnect nodes. + for (const auto host_device : host_devices) { + TF_RETURN_IF_ERROR( + AddHostDisconnectNode(host_device->name(), input_dependencies, + shutdown_node, -1, graph)); + } + + return Status::OK(); + })); + + if (VLOG_IS_ON(1)) { + DumpGraphToFile("distributed_tpu_shutdown_after", *graph, options.flib_def); + } + + VLOG(1) << "DistributedTPUShutdownRewritePass::Run() finished"; + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h new file mode 100644 index 00000000000..191f32f9505 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Rewrites ConfigureDistributedTPU Op into a graph that configures each host. +// +// See the comment at the top of +// third_party/tensorflow/core/ops/tpu_configuration_ops.cc to see the +// sequence of Ops used to configure a distributed TPU system. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_CONFIGURATION_REWRITE_PASS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_CONFIGURATION_REWRITE_PASS_H_ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/platform/env.h" + +namespace tensorflow { + +// Replaces dummy ConfigureDistributedTPU Ops assigned to TPU_SYSTEM +// devices with _ConfigureDistributedTPU and _WaitForDistributedTPU +// Ops on TPU_SYSTEM, and _InitializeHostForDistributedTPU on the CPU +// device of each host in the same job as the given TPU_SYSTEM device. +class DistributedTPUConfigurationRewritePass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +// Replaces dummy ShutdownDistributedTPU Ops assigned to TPU_SYSTEM +// devices with _ShutdownDistributedTPU Ops on TPU_SYSTEM and +// _DisconnectHostFromDistributedTPUSystem on the CPU device of each +// host in the same job as the given TPU_SYSTEM device. +class DistributedTPUShutdownRewritePass : public GraphOptimizationPass { + public: + Status Run(const GraphOptimizationPassOptions& options) override; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_CONFIGURATION_REWRITE_PASS_H_ diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc new file mode 100644 index 00000000000..db2b3a53f20 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_registration.cc @@ -0,0 +1,29 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/optimization_registry.h" +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_configuration_rewrite_pass.h" + +namespace tensorflow { +namespace { + +// This pass removes the TPUEmbeddingConfiguration in ConfigureDistributedTPU. +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, + DistributedTPUConfigurationRewritePass); +REGISTER_OPTIMIZATION(OptimizationPassRegistry::PRE_PLACEMENT, 20, + DistributedTPUShutdownRewritePass); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.cc b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.cc new file mode 100644 index 00000000000..965a17481cb --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.cc @@ -0,0 +1,255 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Helper functions for TPU rewrite passes. + +#include "tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h" + +#include + +#include "tensorflow/core/common_runtime/device_set.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/core/util/device_name_utils.h" + +namespace tensorflow { + +// LINT.IfChange +Status DistributedTPURewriteHelpers::GetSystemDevice( + const string& system_spec_string, const DeviceSet& device_set, + DeviceNameUtils::ParsedName* system_spec, Device** system_device) { + if (!DeviceNameUtils::ParseFullName(system_spec_string, system_spec)) { + system_spec->Clear(); + } + + // Callers may have relied on an Op only being registered on TPU_SYSTEM + // devices to ensure the Op is placed there. Augment the device spec to make + // the device type explicit. + if (!system_spec->has_type || system_spec->type != DEVICE_TPU_SYSTEM) { + system_spec->type = DEVICE_TPU_SYSTEM; + system_spec->has_type = true; + system_spec->id = 0; + system_spec->has_id = true; + } + + std::vector system_devices; + device_set.FindMatchingDevices(*system_spec, &system_devices); + if (system_devices.empty()) { + if (system_spec_string.empty()) { + return errors::InvalidArgument( + "No TPU_SYSTEM device found. Please ensure that you're connected to " + "a host with a TPU_SYSTEM device."); + } + return errors::InvalidArgument("No matching devices found for '", + system_spec_string, "'"); + } else if (system_devices.size() > 1) { + // Validate that all system devices are part of the same job. + std::unordered_set job_names; + for (auto device : system_devices) { + const auto& parsed_name = device->parsed_name(); + TF_RET_CHECK(parsed_name.has_job); + job_names.insert(parsed_name.job); + } + if (job_names.size() > 1) { + return errors::InvalidArgument( + "System devices cannot be part " + "of multiple different jobs. Found: ", + str_util::Join(job_names, ",")); + } + + // Identify the lexicographically first device from the list of + // valid TPU SYSTEM devices, so that every process in the same + // 'cluster' definition uses the same system device. + std::sort(system_devices.begin(), system_devices.end(), + [](Device* i, Device* j) { + auto i_name = i->parsed_name(); + auto j_name = j->parsed_name(); + if (i_name.replica != j_name.replica) { + return i_name.replica < j_name.replica; + } + return i_name.task < j_name.task; + }); + } + + *system_device = system_devices[0]; + if (!DeviceNameUtils::ParseFullName((*system_device)->name(), system_spec)) { + return errors::InvalidArgument("Unable to re-parse system device name ", + (*system_device)->name(), + " as a device spec."); + } + return Status::OK(); +} +// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + +// LINT.IfChange +Status DistributedTPURewriteHelpers::GetHostSystemDevices( + const DeviceNameUtils::ParsedName& system_spec, const DeviceSet& device_set, + std::vector* host_system_devices) { + DeviceNameUtils::ParsedName host_spec; + if (system_spec.has_job) { + // The system Op has been explicitly assigned to a job, so we want + // all the hosts in that job. + CHECK(DeviceNameUtils::ParseFullName( + strings::StrCat("/job:", system_spec.job, "/device:", DEVICE_TPU_SYSTEM, + ":0"), + &host_spec)); + } else { + // The system Op has not been explicitly assigned to a + // job, so take all hosts in the system. There will be a runtime + // error if some of those hosts don't contain TPU devices. + CHECK(DeviceNameUtils::ParseFullName( + strings::StrCat("/device:", DEVICE_TPU_SYSTEM, ":0"), &host_spec)); + } + device_set.FindMatchingDevices(host_spec, host_system_devices); + + TF_RET_CHECK(!host_system_devices->empty()) + << "No hosts found matching device spec " + << DeviceNameUtils::ParsedNameToString(host_spec); + + // Check that all the devices belong to the same job. + TF_RET_CHECK((*host_system_devices)[0]->parsed_name().has_job); + const string& job_name = (*host_system_devices)[0]->parsed_name().job; + int replica = (*host_system_devices)[0]->parsed_name().replica; + for (const auto host_device : *host_system_devices) { + const auto& parsed_name = host_device->parsed_name(); + TF_RET_CHECK(parsed_name.has_job); + if (parsed_name.job != job_name) { + return errors::InvalidArgument( + "All TPU host devices must be in the same job"); + } + TF_RET_CHECK(parsed_name.has_replica); + if (parsed_name.replica != replica) { + return errors::InvalidArgument( + "All TPU host devices must be in the same replica"); + } + } + + // Sort the devices by replica and then task. + std::sort(host_system_devices->begin(), host_system_devices->end(), + [](Device* i, Device* j) { + auto i_name = i->parsed_name(); + auto j_name = j->parsed_name(); + return i_name.task < j_name.task; + }); + return Status::OK(); +} +// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + +// LINT.IfChange +Status DistributedTPURewriteHelpers::GetTPUDevices( + const DeviceNameUtils::ParsedName& system_spec, const DeviceSet& device_set, + int* num_tpus_per_host, std::vector>* tpu_devices) { + // GetHostSystemDevices returns the CPU device on each host that is + // going to be used for executing TPU code. + std::vector host_system_devices; + TF_RETURN_IF_ERROR(DistributedTPURewriteHelpers::GetHostSystemDevices( + system_spec, device_set, &host_system_devices)); + + // Enumerate all the physical devices. Enumerate devices on task 0, + // then task 1, etc. + std::sort(host_system_devices.begin(), host_system_devices.end(), + [](Device* i, Device* j) { + return i->parsed_name().task < j->parsed_name().task; + }); + + *num_tpus_per_host = 0; + tpu_devices->clear(); + tpu_devices->reserve(host_system_devices.size()); + for (const auto device : host_system_devices) { + // Make a copy of the parsed name because we are going to change it. + DeviceNameUtils::ParsedName device_spec = device->parsed_name(); + device_spec.has_type = true; + device_spec.type = "TPU"; + // Enumerate all the available TPUs. + device_spec.has_id = false; + std::vector host_tpu_devices; + device_set.FindMatchingDevices(device_spec, &host_tpu_devices); + // Sort the devices by device id. + std::sort(host_tpu_devices.begin(), host_tpu_devices.end(), + [](Device* i, Device* j) { + return i->parsed_name().id < j->parsed_name().id; + }); + if (tpu_devices->empty()) { + // First iteration: set *num_tpus_per_host to the number of TPUs on the + // first host. + *num_tpus_per_host = host_tpu_devices.size(); + } else if (*num_tpus_per_host != host_tpu_devices.size()) { + // Subsequent iterations: check the number of TPUs match the number on + // the first host. + return errors::InvalidArgument( + "Mismatched number of TPU devices in cluster ", *num_tpus_per_host, + " vs. ", host_tpu_devices.size()); + } + tpu_devices->push_back(std::move(host_tpu_devices)); + } + return Status::OK(); +} +// LINT.ThenChange(//tensorflow/compiler/mlir/tensorflow/utils/tpu_rewrite_device_util.cc) + +Status DistributedTPURewriteHelpers::ForConfigurationNodeMatchingType( + const string& node_type, Graph* graph, const DeviceSet& device_set, + const std::function< + Status(const NodeDef& configuration_node_def, + const string& configuration_device_name, + const std::vector& host_devices, + const std::vector& input_dependencies, + const std::vector& output_dependencies, + Graph* graph)>& action) { + // Find all the matching nodes before mutating the graph. + std::vector nodes; + for (Node* node : graph->nodes()) { + if (node->type_string() == node_type) { + nodes.push_back(node); + } + } + + for (Node* node : nodes) { + string spec_string = node->requested_device(); + DeviceNameUtils::ParsedName spec; + Device* device; + TF_RETURN_IF_ERROR( + GetSystemDevice(spec_string, device_set, &spec, &device)); + const string& device_name = device->name(); + + std::vector host_devices; + TF_RETURN_IF_ERROR(GetHostSystemDevices(spec, device_set, &host_devices)); + + std::vector input_dependencies; + for (const Edge* edge : node->in_edges()) { + // Config ops have no inputs, so all edges must be control edges. + CHECK(edge->IsControlEdge()); + input_dependencies.push_back(edge->src()); + } + std::vector output_dependencies; + for (const Edge* edge : node->out_edges()) { + OutputDependency dep; + dep.src_output = edge->src_output(); + dep.dst = edge->dst(); + dep.dst_input = edge->dst_input(); + output_dependencies.push_back(dep); + } + NodeDef node_def = node->def(); + + // Remove the node now so we can insert a new node with the same + // name inside the action. + graph->RemoveNode(node); + + TF_RETURN_IF_ERROR(action(node_def, device_name, host_devices, + input_dependencies, output_dependencies, graph)); + } + + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h new file mode 100644 index 00000000000..40aacceb5d5 --- /dev/null +++ b/tensorflow/core/tpu/graph_rewrite/distributed_tpu_rewrite_helpers.h @@ -0,0 +1,98 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Helper functions for TPU rewrite passes. + +#ifndef TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_HELPERS_H_ +#define TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_HELPERS_H_ + +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/common_runtime/device_set.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/util/device_name_utils.h" + +namespace tensorflow { + +class DistributedTPURewriteHelpers { + public: + // Given a user-assigned device string, system_spec_string, parse it into + // system_spec. Verify that the device type is either TPU_SYSTEM or + // unassigned, and in the latter case set it to TPU_SYSTEM:0. Having set the + // type, verify that the spec matches a unique device in device_set, and + // return that device in system_device. The normal use case is for + // system_spec_string to identify the TPU_SYSTEM on replica 0, task 0 of the + // job that contains the TPU hardware. + // TODO(b/110910013): Possibly remove the tpu system device. + static Status GetSystemDevice(const string& system_spec_string, + const DeviceSet& device_set, + DeviceNameUtils::ParsedName* system_spec, + Device** system_device); + + // Given a parsed system spec (e.g., the one returned above from + // GetSystemDeviceName), return in host_devices the TPU_SYSTEM:0 device on + // every host in the spec's job. If the spec does not include an explicit job, + // "localhost" is used. Returns an error if system_spec matches devices from + // a multiple jobs or replicas. + static Status GetHostSystemDevices( + const DeviceNameUtils::ParsedName& system_spec, + const DeviceSet& device_set, std::vector* host_system_devices); + + // Given a parsed system spec (e.g., the one returned above from + // GetSystemDeviceName), sets `*tpu_devices` to a per-host vector of the TPU + // devices on every host in the spec's job. If the spec does not include an + // explicit job, "localhost" is used. Sets `*num_tpus_per_host` to the number + // of TPU devices in each host, and verifies that each host in the job has + // the same number of TPU devices. + // Returns an error if system_spec matches devices from a multiple jobs or + // replicas. + static Status GetTPUDevices(const DeviceNameUtils::ParsedName& system_spec, + const DeviceSet& device_set, + int* num_tpus_per_host, + std::vector>* tpu_devices); + + // Perform 'action' on every node in 'graph' of type + // 'node_type'. This function is designed for use with configuration + // Ops that have no inputs or outputs. The arguments passed to 'action' are: + // 'configuration_node_name': the name of the node that matched + // 'configuration_device_name': the name of the device that the + // matching node is placed on + // 'host_devices': the set of TPU_SYSTEM devices on hosts with TPUs that are + // in the same system as the node that matched. + // 'input_dependencies': the set of nodes that have control edges to + // the matching node. + // 'output_dependencies': the set of output port, destination node, input port + // triples that have edges from the matching node. Input port is + // Graph::kControlSlot for a control edge. + // 'graph': the graph being mutated. + struct OutputDependency { + int src_output; + Node* dst; + int dst_input; + }; + static Status ForConfigurationNodeMatchingType( + const string& node_type, Graph* graph, const DeviceSet& device_set, + const std::function< + Status(const NodeDef& configuration_node_def, + const string& configuration_device_name, + const std::vector& host_devices, + const std::vector& input_dependencies, + const std::vector& output_dependencies, + Graph* graph)>& action); +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_GRAPH_REWRITE_DISTRIBUTED_TPU_REWRITE_HELPERS_H_ diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD new file mode 100644 index 00000000000..0e5a91c961c --- /dev/null +++ b/tensorflow/core/tpu/kernels/BUILD @@ -0,0 +1,288 @@ +# TPU Kernel Implementations +load( + "//tensorflow/core/platform:build_config.bzl", + "tf_proto_library_cc", +) + +package( + default_visibility = [ + "//tensorflow/core/tpu:__subpackages__", + "//tensorflow/stream_executor/tpu:__subpackages__", + ], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tpu_compile_op_options", + srcs = ["tpu_compile_op_options.cc"], + hdrs = ["tpu_compile_op_options.h"], +) + +cc_library( + name = "tpu_configuration_ops", + srcs = ["tpu_configuration_ops.cc"], + hdrs = ["tpu_configuration_ops.h"], + deps = [ + ":tpu_mesh_state_interface", + "//tensorflow/c:tf_status", + "//tensorflow/c:tf_status_helper", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/platform:refcount", + "//tensorflow/core/tpu:tpu_config_c_api", + "//tensorflow/core/tpu:tpu_configuration", + "//tensorflow/core/tpu:tpu_defs", + "//tensorflow/core/tpu:tpu_library_loader", + "//tensorflow/stream_executor/tpu:proto_helper", + ], + alwayslink = 1, +) + +cc_library( + name = "tpu_compile_c_api_hdrs", + hdrs = ["tpu_compile_c_api.h"], + deps = [ + ":tpu_mesh_state_c_api", + "//tensorflow/c:tf_datatype", + "//tensorflow/stream_executor/tpu:proto_helper", + "//tensorflow/stream_executor/tpu:tpu_executor_c_api_hdrs", + ], +) + +tf_proto_library_cc( + name = "tpu_executable_info_proto", + srcs = ["tpu_executable_info.proto"], + cc_api_version = 2, + protodeps = [ + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_proto", + "//tensorflow/core:protos_all", + ], +) + +tf_proto_library_cc( + name = "tpu_compile_proto", + srcs = ["tpu_compile.proto"], + cc_api_version = 2, + protodeps = [ + ":tpu_executable_info_proto", + "//tensorflow/compiler/tf2xla:host_compute_metadata_proto", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_proto", + "//tensorflow/core:protos_all", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto", + ], +) + +cc_library( + name = "tpu_compilation_cache_key", + srcs = [], + hdrs = [ + "tpu_compilation_cache_key.h", + ], + deps = ["@com_google_absl//absl/types:optional"], +) + +cc_library( + name = "tpu_compile_op_support", + srcs = ["tpu_compile_op_support.cc"], + hdrs = ["tpu_compile_op_support.h"], + deps = [ + ":tpu_compilation_cache_key", + ":tpu_compile_c_api_hdrs", + ":tpu_compile_proto_cc", + ":tpu_executable_info_proto_cc", + "//tensorflow/cc:ops", + "//tensorflow/compiler/tf2xla:common", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:debug_options_flags", + "//tensorflow/compiler/xla:shape_tree", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/client:compile_only_client", + "//tensorflow/compiler/xla/service:computation_layout", + "//tensorflow/compiler/xla/service:dump", + "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_module_config", + "//tensorflow/compiler/xla/service:hlo_module_group", + "//tensorflow/core:framework", + "//tensorflow/core/framework:protos_all_cc", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/stream_executor/tpu:proto_helper", + "//tensorflow/stream_executor/tpu:status_helper", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "tpu_compilation_cache_entry", + hdrs = [ + "tpu_compilation_cache_entry.h", + ], + deps = [ + ":tpu_executable_info_proto_cc", + ":tpu_program", + "//tensorflow/compiler/xla/service:hlo_proto_cc", + "//tensorflow/core/lib/core:refcount", + ], +) + +cc_library( + name = "tpu_compilation_cache_lookup", + srcs = ["tpu_compilation_cache_lookup.cc"], + hdrs = [ + "tpu_compilation_cache_lookup.h", + ], + deps = [ + ":tpu_compilation_cache_entry", + ":tpu_compilation_cache_external", + ":tpu_compilation_cache_proto_cc", + "//tensorflow/core/lib/core:refcount", + "//tensorflow/core/platform:status", + "//tensorflow/core/profiler/lib:traceme", + ], +) + +cc_library( + name = "tpu_mesh_state_c_api", + hdrs = ["tpu_mesh_state_c_api.h"], +) + +cc_library( + name = "tpu_mesh_state_interface", + srcs = [], + hdrs = ["tpu_mesh_state_interface.h"], + deps = [ + ":tpu_compile_c_api_hdrs", + ":tpu_mesh_state_c_api", + "//tensorflow/compiler/xla/service", + "//tensorflow/core:framework", + "//tensorflow/core/platform:errors", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/core/tpu:tpu_config_c_api", + ], +) + +cc_library( + name = "tpu_program", + srcs = ["tpu_program.cc"], + hdrs = ["tpu_program.h"], + deps = [ + ":tpu_compile_c_api_hdrs", + ":tpu_compile_op_support", + ":tpu_compile_proto_cc", + ":tpu_executable_info_proto_cc", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:xla_proto_cc", + "//tensorflow/compiler/xla/client:compile_only_client", + "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/compiler/xla/service:hlo_module_group", + "//tensorflow/compiler/xla/service:hlo_proto_cc", + "//tensorflow/core:lib", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "//tensorflow/stream_executor/tpu:proto_helper", + "//tensorflow/stream_executor/tpu:status_helper", + "//tensorflow/stream_executor/tpu:tpu_platform_interface", + "@com_google_absl//absl/types:optional", + ], +) + +cc_library( + name = "tpu_compilation_cache_external", + srcs = ["tpu_compilation_cache_external.cc"], + hdrs = [ + "tpu_compilation_cache_external.h", + ], + deps = [ + ":tpu_compilation_cache_entry", + ":tpu_compilation_cache_key", + ":tpu_compilation_cache_metrics", # buildcleaner: keep + ":tpu_compilation_cache_metrics_hdrs", + ":tpu_compilation_cache_proto_cc", + ":tpu_compile_c_api_hdrs", + ":tpu_compile_op_support", + ":tpu_mesh_state_interface", + ":tpu_program", + ":tpu_util", + ":trace_util_hdrs", + "//tensorflow/compiler/xla/service", + "//tensorflow/compiler/xla/service:hlo_proto_cc", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/platform:refcount", + "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/protobuf/tpu:compile_metadata_proto_cc", + "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:span", + ], +) + +cc_library( + name = "tpu_compilation_cache_metrics_hdrs", + hdrs = ["tpu_compilation_cache_metrics.h"], + deps = [ + "//tensorflow/core/platform:types", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "tpu_compilation_cache_metrics", + srcs = ["tpu_compilation_cache_metrics.cc"], + deps = [ + ":tpu_compilation_cache_metrics_hdrs", + ], +) + +cc_library( + name = "trace_util_hdrs", + srcs = [], + hdrs = ["trace_util.h"], + deps = [ + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "tpu_util_hdrs", + srcs = [], + hdrs = ["tpu_util.h"], + deps = [ + ":tpu_compilation_cache_key", + "//tensorflow/cc:ops", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:compile_only_client", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/strings", + ], +) + +cc_library( + name = "tpu_util", + srcs = ["tpu_util.cc"], + hdrs = ["tpu_util.h"], + deps = [ + ":tpu_compilation_cache_key", + "//tensorflow/cc:ops", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:compile_only_client", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "@com_google_absl//absl/strings", + ], + alwayslink = 1, +) + +tf_proto_library_cc( + name = "tpu_compilation_cache_proto", + srcs = ["tpu_compilation_cache.proto"], + cc_api_version = 2, +) diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache.proto b/tensorflow/core/tpu/kernels/tpu_compilation_cache.proto new file mode 100644 index 00000000000..8308cba128e --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache.proto @@ -0,0 +1,25 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +syntax = "proto3"; + +package tensorflow.tpu; + +// Target type for compilation cache fetch operation. +enum CompilationCacheFetchTarget { + INVALID = 0; + MAIN = 1; + SHARDING = 2; + UNSHARDING = 3; +} diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h new file mode 100644 index 00000000000..d16b2d521f6 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -0,0 +1,84 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ +#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ + +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_program.h" + +namespace tensorflow { +namespace tpu { + +class CompilationCacheEntry { + public: + explicit CompilationCacheEntry( + std::unique_ptr tpu_program) + : tpu_program_(std::move(tpu_program)) {} + + // Constructor for an empty entry. + CompilationCacheEntry() + : tpu_program_(nullptr) {} + + const TPUExecutableInfoProto* get_executable_info() const { + return &tpu_program_->executable_info(); + } + + const TPUHostTransferInfoProto* get_host_transfer_info() const { + return &tpu_program_->host_transfer_info(); + } + + const xla::HloProto* get_hlo_metadata() const { + return &tpu_program_->hlo_metadata(); + } + + // TODO(henrytan,jiawenhao): When should we expect more than one + // XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. + const XLA_TpuProgram* get_tpu_program() const { + CHECK_EQ(tpu_program_->program_count(), 1); + return tpu_program_->tpu_programs()[0]; + } + + private: + std::unique_ptr tpu_program_; +}; + +// Base class for a reference to a cached proto. A unique_ptr to a +// CompilationCacheEntryRef is returned by all the cache Lookup methods below, +// and ensures the underlying proto is not garbage-collected until the client +// discards the ptr. +class CompilationCacheEntryRef { + public: + virtual ~CompilationCacheEntryRef() = default; + + // Returns a CompilationCacheEntry that should not be used beyond the lifetime + // of the CompilationCacheEntryRef. + virtual CompilationCacheEntry get() = 0; +}; + +// Base class that holds references to compiled protos so that the protos are +// not garbage-collected before being used by execute ops. Use +// TpuCompilationCache::MakePerStepRefHolder to create an instance of a concrete +// ref holder object. +class CompilationRefHolder : public ResourceBase { + public: + ~CompilationRefHolder() override = default; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc new file mode 100644 index 00000000000..8dbf60803cc --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -0,0 +1,791 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" + +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/platform/random.h" +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_program.h" +#include "tensorflow/core/tpu/kernels/tpu_util.h" +#include "tensorflow/core/tpu/kernels/trace_util.h" + +namespace tensorflow { +namespace tpu { + +namespace { + +using CompilationEntry = TpuCompilationCacheInterface::CompilationEntry; + +int64 get_uid() { + uint64 unsigned_rand = random::New64() & INT64_MAX; + return static_cast(unsigned_rand); +} + +void PopulateEntry(const std::string& key, CompilationEntry* entry, + std::unique_ptr tpu_program) { + // Make the unique keys for each cached proto. + for (int i = 0; i < tpu_program->program_count(); ++i) { + entry->proto_key.push_back(ProtoKeyForComputation(key, i)); + } + + entry->tpu_program = std::move(tpu_program); + entry->initialized = true; +} + +std::string ConstructCompilationCacheKey(const TpuCompilationCacheKey& key) { + if (!key.has_guaranteed_const) { + return key.prefix; + } + return absl::StrCat(key.prefix, "|", key.session_handle, "|", + key.guaranteed_const_fingerprint()); +} + +// Return fingerprint_in_metadata if it's not empty; otherwise read input tensor +// data to compute the fingerprint. +std::string GuaranteedConstFingerprint( + const string& fingerprint_in_metadata, + const OpInputList& guaranteed_constants) { + if (fingerprint_in_metadata.empty()) { + uint64_t fingerprint = 0; + for (const auto& constant : guaranteed_constants) { + fingerprint = TpuCompile_CreateGuaranteedConstFingerprint( + fingerprint, constant.tensor_data().data(), + constant.tensor_data().size()); + } + return std::to_string(fingerprint); + } else { + return fingerprint_in_metadata; + } +} + +std::string CreateShapePrefix( + const std::vector& dynamic_shapes) { + std::string shapes_prefix; + for (const TensorShape& shape : dynamic_shapes) { + for (int64 size : shape.dim_sizes()) { + absl::StrAppend(&shapes_prefix, size, ","); + } + absl::StrAppend(&shapes_prefix, ";"); + } + return shapes_prefix; +} + +// Include compilation configurations of the arguments that are not captured +// by the called graph. +std::string CreateConfigPrefix(const TPUCompileMetadataProto& metadata) { + std::string config_prefix; + for (const auto& arg : metadata.args()) { + if (arg.is_same_data_across_replicas()) { + absl::StrAppend(&config_prefix, ":s"); + // Same. + } else { + // Different. + absl::StrAppend(&config_prefix, ":"); + } + if (arg.enable_xla_sharding() == + tpu::TPUCompileMetadataProto::Arg::ALLOWED) { + // Enabled. + absl::StrAppend(&config_prefix, "e"); + } + if (arg.unrestricted_layout()) { + // Unrestricted. + absl::StrAppend(&config_prefix, ":u"); + } + absl::StrAppend(&config_prefix, ",type(", arg.dtype(), ")"); + if (arg.has_shape()) { + absl::StrAppend(&config_prefix, ",shape("); + for (const auto& dim : arg.shape().dim()) { + absl::StrAppend(&config_prefix, dim.size(), ","); + } + absl::StrAppend(&config_prefix, ")"); + } + } + return config_prefix; +} + +} // namespace + +TpuCompilationCacheInterface::TpuCompilationCacheInterface( + int64_t max_cache_size) + : max_cache_size_(max_cache_size) { + if (max_cache_size < 0) { + LOG(FATAL) << "`max_cache_size` value must be greater than equal to 0"; + } + VLOG(1) << "Created compilation cache size " << max_cache_size_ << " bytes."; +} + +TpuCompilationCacheInterface::~TpuCompilationCacheInterface() { + VLOG(1) << "TpuCompilationCacheInterface::~TpuCompilationCacheInterface()"; + // A buggy client may be holding onto a reference, or a client might have + // crashed while holding onto a reference. In either case, discard all + // outstanding client references to avoid leaking storage. + for (const auto& entry : entries_by_uid_) { + while (entry.second->external_references > 0) { + TF_CHECK_OK(Release(entry.first)); + } + } + while (!entries_by_last_use_.empty()) { + UnloadAndDestroy(MarkOldestEntryForEviction()); + } + // By the time the cache is deleted all reference holders should have already + // been deleted, since they were holding references to the cache. So all + // entries should be gone at this point. + CHECK_EQ(cache_store_.size(), 0); + CHECK_EQ(entries_by_uid_.size(), 0); + CHECK_EQ(entries_by_proto_key_.size(), 0); + CHECK_EQ(cache_size_, 0); + CHECK_EQ(marked_for_eviction_size_, 0); +} + +std::string TpuCompilationCacheInterface::FindCacheKey( + const TpuCompilationCacheKey& subgraph_key) const { + if (!subgraph_key.has_guaranteed_const) { + return subgraph_key.prefix; + } + auto iter = session_key_map_.find( + strings::StrCat(subgraph_key.prefix, subgraph_key.session_handle)); + if (iter != session_key_map_.end()) { + return iter->second; + } + iter = fingerprint_key_map_.find(strings::StrCat( + subgraph_key.prefix, subgraph_key.guaranteed_const_fingerprint())); + if (iter != session_key_map_.end()) { + return iter->second; + } + VLOG(1) << "No matching cache key found for key " + << ConstructCompilationCacheKey(subgraph_key); + return ""; +} + +void TpuCompilationCacheInterface::InsertEntry( + const std::string& cache_key, const TpuCompilationCacheKey& subgraph_key, + CompilationEntry* entry) { + entry->parent = this; + entry->subgraph_key = cache_key; + entry->uid = get_uid(); + TpuCompilationCacheMetrics::SetCacheEntryCount(cache_store_.size()); + entry->cache_entry_debug_string = subgraph_key.prefix; + VLOG(1) << "Cache Initializing Entry Session Debug " + << entry->cache_entry_debug_string; + + if (!subgraph_key.has_guaranteed_const) { + return; + } + session_key_map_.insert(std::make_pair( + strings::StrCat(subgraph_key.prefix, subgraph_key.session_handle), + cache_key)); + fingerprint_key_map_.insert(std::make_pair( + strings::StrCat(subgraph_key.prefix, + subgraph_key.guaranteed_const_fingerprint()), + cache_key)); +} + +CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( + const string& key, + const std::function& initialize_program, + const TpuCompilationCacheKey& subgraph_key) { + CompilationEntry* main_entry = new CompilationEntry(); + + // Add the entry to the cache, with size zero since there are no compiled + // programs in it. Once the subgraph has been compiled, + // UpdateEntryAfterCompilation will be called to potentially mark old entries + // that don't fit any more for eviction. + // + // At this point there is one reference to entry, which is owned by the caller + // who created the entry. A second reference, owned by the cache, will be + // added below since we leave the entry in the 'marked for eviction' state + // here. + InsertEntry(key, subgraph_key, main_entry); + + // Initialize the programs outside the lock so that other cache operations + // can proceed during the (potentially lengthy) initialization. + Status initialization_status; + + auto tpu_program = absl::make_unique(); + { + mu_.Unlock(); + { + profiler::TraceMe compile_programs_traceme( + "TPU compilation cache compile", + /*level=*/2); + initialization_status = initialize_program(tpu_program.get()); + } + mu_.Lock(); + } + + main_entry->initialization_status = initialization_status; + + // Add the entry to the uid index. + auto uid_inserted = entries_by_uid_.insert( + std::pair(main_entry->uid, main_entry)); + CHECK(uid_inserted.second); + + if (initialization_status.ok()) { + // Compute the entries total size once all members are initialized. + main_entry->total_size = tpu_program->program_size(); + } + + // TODO(henrytan): handle sharding/unsharding. + PopulateEntry(key, main_entry, std::move(tpu_program)); + + for (int64 i = 0; i < main_entry->proto_key.size(); ++i) { + auto entry_inserted = entries_by_proto_key_.insert( + std::pair>( + main_entry->proto_key[i], std::make_pair(main_entry, i))); + CHECK(entry_inserted.second); + } + + // Add the size to marked_for_eviction_size_ since it will be adjusted down + // again when the newly-created entry gets unmarked. + marked_for_eviction_size_ += main_entry->total_size; + return main_entry; +} + +/*static*/ TpuCompilationCacheKey +TpuCompilationCacheInterface::CreateCompilationCacheKey( + absl::string_view function_name, uint64 function_library_fingerprint, + absl::string_view mlir_module, + const tensorflow::OpInputList& guaranteed_constants, + const std::vector& dynamic_shapes, + const tensorflow::tpu::TPUCompileMetadataProto& metadata, + const TpuMeshStateInterface& mesh_state) { + VLOG(1) << "FunctionLibraryFingerprint:" << function_library_fingerprint; + std::string shapes_prefix = CreateShapePrefix(dynamic_shapes); + VLOG(1) << "shapes_prefix = " << shapes_prefix; + std::string config_prefix = CreateConfigPrefix(metadata); + VLOG(1) << "config_prefix = " << config_prefix; + std::vector flattened_device_ids; + if (metadata.has_device_assignment()) { + for (const auto& device : + metadata.device_assignment().computation_devices()) { + flattened_device_ids.insert(flattened_device_ids.end(), + device.replica_device_ids().begin(), + device.replica_device_ids().end()); + } + } + // TODO(henrytan): return the debug_string. + const char* prefix = + TpuCompile_CreateCompilationCacheKey(CompilationCacheKeyProperty{ + config_prefix.data(), + shapes_prefix.data(), + function_name.data(), + mlir_module.data(), + flattened_device_ids.data(), + flattened_device_ids.size(), + guaranteed_constants.size(), + function_library_fingerprint, + metadata.num_cores_per_replica(), + metadata.num_replicas(), + mesh_state.data(), + }); + auto buffer_cleanup = gtl::MakeCleanup([prefix]() { delete[] prefix; }); + TpuCompilationCacheKey key; + key.prefix = prefix; + + // Guaranteed constants can be different across sessions. Use session_handle + // and guaranteed_const fingerprint to guarantee no collision. + if (guaranteed_constants.size() > 0) { + key.has_guaranteed_const = true; + key.session_handle = metadata.session_handle(); + // Both `metadata` and `guaranteed_constants` lifetime are captured by + // reference based on the assumption that these variables lifetime is + // managed through the `TPUCompileOpKernelImpl` that outlives the + // lifetime of the compilation cache lookups. + string fingerprint; + key.guaranteed_const_fingerprint = [&metadata, &guaranteed_constants, + fingerprint]() mutable { + if (fingerprint.empty()) { + fingerprint = GuaranteedConstFingerprint( + metadata.guaranteed_const_fingerprint(), guaranteed_constants); + } + return fingerprint; + }; + } + return key; +} + +TpuCompilationRefHolder* TpuCompilationCacheInterface::MakePerStepRefHolder() { + return new RefHolder(this); +} + +Status TpuCompilationCacheInterface::MarkEntryForEviction(int64 subgraph_uid) { + profiler::TraceMe key_release_traceme( + "TPU compilation cache possibly evict uid", + /*level=*/2); + CompilationEntry* deleted_entry = nullptr; + { + absl::MutexLock lock(&mu_); + auto iter = entries_by_uid_.find(subgraph_uid); + if (iter == entries_by_uid_.end()) { + // If already evicted, return ok. + return Status::OK(); + } + + // Mark entry for eviction. + CompilationEntry* subgraph_to_evict = iter->second; + // If there are external references, should not use this API. + if (subgraph_to_evict->external_references != 0) { + return errors::Internal("Subgraph ", subgraph_to_evict->subgraph_key, + " external_references greater than zero. Should " + "use TpuCompilationCache::Release."); + } + + VLOG(1) << "Marking " << subgraph_to_evict->subgraph_key << " for eviction"; + entries_by_last_use_.erase(subgraph_to_evict->last_use); + cache_size_ -= subgraph_to_evict->total_size; + marked_for_eviction_size_ += subgraph_to_evict->total_size; + + // Evict if refcount exactly one, otherwise only discard cache's reference + // to the entry while the actual eviction will happen when refholder's + // references go away. + deleted_entry = DiscardEntryRef(subgraph_to_evict); + + VLOG(1) << "After possibly evicting entry " << subgraph_uid + << " refs cache is " << cache_store_.size() << " entries (" + << cache_size_ + marked_for_eviction_size_ + << " bytes), marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + } + + // Unload from device cache if entry is evicted from host cache. + UnloadAndDestroy(deleted_entry); + return Status::OK(); +} + +Status TpuCompilationCacheInterface::Release(int64 subgraph_uid) { + profiler::TraceMe key_release_traceme("TPU compilation cache release uid", + /*level=*/2); + + CompilationEntry* deleted_entry = nullptr; + { + absl::MutexLock lock(&mu_); + auto iter = entries_by_uid_.find(subgraph_uid); + + if (iter == entries_by_uid_.end()) { + return errors::NotFound("No cache entry found for uid ", subgraph_uid); + } + + CHECK_GT(iter->second->external_references, 0); + --iter->second->external_references; + + deleted_entry = DiscardEntryRef(iter->second); + + VLOG(1) << "After releasing entry " << subgraph_uid << " refs cache is " + << cache_store_.size() << " entries (" + << cache_size_ + marked_for_eviction_size_ + << " bytes), marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + } + UnloadAndDestroy(deleted_entry); + return Status::OK(); +} + +void TpuCompilationCacheInterface::UnloadAndDestroy(CompilationEntry* entry) { + if (!entry) return; + + CHECK(entry->RefCountIsOne()); + entry->tpu_program->UnloadAndDestroyPrograms(); + entry->Unref(); +} + +size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { + auto erased = cache_store_.erase(key); + TpuCompilationCacheMetrics::SetCacheEntryCount(cache_store_.size()); + auto parsed_key_or_status = ParseCompilationCacheKey(key); + CHECK(parsed_key_or_status.status().ok()); + const TpuCompilationCacheKey parsed_key = + parsed_key_or_status.ConsumeValueOrDie(); + if (!parsed_key.has_guaranteed_const) { + return erased; + } + session_key_map_.erase( + strings::StrCat(parsed_key.prefix, parsed_key.session_handle)); + fingerprint_key_map_.erase(strings::StrCat( + parsed_key.prefix, parsed_key.guaranteed_const_fingerprint())); + return erased; +} + +ABSL_MUST_USE_RESULT CompilationEntry* +TpuCompilationCacheInterface::DiscardEntryRef(CompilationEntry* entry) { + if (entry->RefCountIsOne()) { + // The last reference to this entry is going away, so really delete it from + // the cache in such a way that it can't be restored by being looked up + // again. + + // Sanity-check that it has been marked for eviction. + CHECK(entries_by_last_use_.find(entry->last_use) == + entries_by_last_use_.end()); + // Update the counter tracking how much space is taken up by entries that + // are marked for eviction. + marked_for_eviction_size_ -= entry->total_size; + + // Remove the entry from the cache. + auto erased = RemoveEntry(entry->subgraph_key); + + if (erased == 0) { + LOG(FATAL) << "Tried to discard nonexistent cache entry"; + } + erased = entries_by_uid_.erase(entry->uid); + CHECK_EQ(erased, 1); + for (const string& key : entry->proto_key) { + erased = entries_by_proto_key_.erase(key); + CHECK_EQ(erased, 1); + } + // The actual deletion will happen outside the lock in UnloadAndDestroy(). + return entry; + } + entry->Unref(); + return nullptr; +} + +void TpuCompilationCacheInterface::DiscardEntryRefs( + gtl::ArraySlice entries) { + std::vector removed_entries; + { + absl::MutexLock lock(&mu_); + + for (auto entry : entries) { + removed_entries.push_back(DiscardEntryRef(entry)); + } + + VLOG(1) << "After discarding entry refs cache is " << cache_store_.size() + << " entries (" << cache_size_ + marked_for_eviction_size_ + << " bytes), marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + } + for (auto removed_entry : removed_entries) { + UnloadAndDestroy(removed_entry); + } +} + +ABSL_MUST_USE_RESULT CompilationEntry* +TpuCompilationCacheInterface::MarkOldestEntryForEviction() { + CompilationEntry* entry_to_mark = entries_by_last_use_.begin()->second; + VLOG(1) << "Marking " << entry_to_mark->subgraph_key << " for eviction"; + entries_by_last_use_.erase(entry_to_mark->last_use); + cache_size_ -= entry_to_mark->total_size; + marked_for_eviction_size_ += entry_to_mark->total_size; + // Discard the cache's reference to entry. If steps are holding onto + // references to entry it won't be deleted until the last step holding it + // completes. It stays in the cache in the meantime and can be resurrected + // by a call to CompileIfKeyAbsent if that occurs before the last reference + // expires. + return DiscardEntryRef(entry_to_mark); +} + +void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( + CompilationEntry* entry, std::vector* removed_entries) { + // The entry was previously marked for eviction (or is newly created) so + // unmark it. Add a reference (owned by the cache), update the cache size, and + // mark something old for eviction if necessary. + entry->Ref(); + marked_for_eviction_size_ -= entry->total_size; + cache_size_ += entry->total_size; + + // Mark the least-recently-used non-marked entry for eviction. Never mark the + // most-recently used entry (i.e., do nothing if entries_by_last_use_ == 1 + // which means there's only one entry not already marked for eviction), so + // that an entry persists in the cache even if it is larger than the allocated + // cache size. + while (entries_by_last_use_.size() > 1 && cache_size_ > max_cache_size_) { + if (auto entry_to_evict = MarkOldestEntryForEviction()) { + removed_entries->push_back(entry_to_evict); + } + } +} + +Status TpuCompilationCacheInterface::ToSubEntryRef( + CompilationCacheEntryRef* entry, + CompilationCacheFetchTarget fetch_target) const { + return static_cast(entry)->ToSubEntryRef(fetch_target); +} + +TpuCompilationCacheInterface::EntryRefImpl::EntryRefImpl( + TpuCompilationCacheInterface* parent, CompilationEntry* entry, int index) + : parent_(parent), entry_(entry), index_(index) { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + entry_->Ref(); + } else { + // This is a sharding/unsharding entry nested in a main entry. Only refcount + // the main entry. + entry_->main_entry->Ref(); + } +} + +TpuCompilationCacheInterface::EntryRefImpl::~EntryRefImpl() { + if (entry_ == nullptr) { + return; + } + if (entry_->main_entry == nullptr) { + parent_->DiscardEntryRefs({entry_}); + } else { + parent_->DiscardEntryRefs({entry_->main_entry}); + } +} + +CompilationCacheEntry TpuCompilationCacheInterface::EntryRefImpl::get() { + if (entry_ == nullptr) { + // Create an empty entry if the entry is nullptr. This corresponds to + // non-existing sharding/unsharding entries. + return CompilationCacheEntry(); + } + return CompilationCacheEntry(std::move(entry_->tpu_program)); +} + +Status TpuCompilationCacheInterface::EntryRefImpl::ToSubEntryRef( + CompilationCacheFetchTarget fetch_target) { + CompilationEntry* target = nullptr; + switch (fetch_target) { + case CompilationCacheFetchTarget::MAIN: + target = entry_; + break; + case CompilationCacheFetchTarget::SHARDING: + target = entry_->sharding_entry.get(); + break; + case CompilationCacheFetchTarget::UNSHARDING: + target = entry_->unsharding_entry.get(); + break; + default: + return xla::InvalidArgument("Invalid fetch target: %d", fetch_target); + } + + if (target == nullptr) { + // Cache entry does not have an unsharding subentry. Unref and replace + // with nullptr. + parent_->DiscardEntryRefs({entry_}); + } + // Otherwise, since the refcount is always on the main entry, we don't need + // ref/unref. + entry_ = target; + return Status::OK(); +} + +Status TpuCompilationCacheInterface::Lookup( + int64 uid, int proto_index, + std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme( + "TPU compilation cache proto lookup by uid", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_uid_.find(uid); + if (iter == entries_by_uid_.end()) { + return errors::NotFound("No subgraph found for uid ", uid); + } + CompilationEntry* cache_entry = iter->second; + if (proto_index < 0 || + proto_index >= cache_entry->tpu_program->program_size()) { + return errors::NotFound("No proto found for core index ", proto_index, + " in subgraph with uid ", uid); + } + *entry = std::unique_ptr( + new EntryRefImpl(this, cache_entry, proto_index)); + return Status::OK(); +} + +Status TpuCompilationCacheInterface::Lookup( + const string& proto_key, std::unique_ptr* entry) { + entry->reset(); + + profiler::TraceMe proto_lookup_traceme("TPU compilation cache proto lookup", + /*level=*/2); + + absl::MutexLock lock(&mu_); + const auto iter = entries_by_proto_key_.find(proto_key); + if (iter == entries_by_proto_key_.end()) { + return errors::NotFound("No proto found for key ", proto_key); + } + CompilationEntry* cache_entry = iter->second.first; + int proto_index = iter->second.second; + *entry = std::unique_ptr( + new EntryRefImpl(this, cache_entry, proto_index)); + return Status::OK(); +} + +Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( + const TpuCompilationCacheKey& subgraph_key, + const SessionMetadata* session_metadata, + TpuCompilationRefHolder* per_step_ref_holder, int64* uid, + std::vector* proto_key, std::vector* may_modify_variables, + std::vector* removed_entries, + std::vector>* hlo_metadata, + const std::function& compile_function) { + profiler::TraceMe subgraph_lookup_traceme( + "TPU compilation cache subgraph lookup", + /*level=*/2); + + // NOTE: In spite of the fact that we use MutexLock, we do not hold the lock + // for the lifetime of the object, see InitializeEntry() call below. + absl::MutexLock lock(&mu_); + + std::string cache_key = FindCacheKey(subgraph_key); + auto iter = cache_store_.find(cache_key); + bool is_new_key = iter == cache_store_.end(); + + const std::string session_name = SessionNameFromMetadata(session_metadata); + + CompilationEntry* entry = nullptr; + if (is_new_key) { + cache_key = ConstructCompilationCacheKey(subgraph_key); + TpuCompilationCacheMetrics::IncrementCacheLookupCount( + /*is_cache_hit=*/false, session_name); + const string msg = + strings::StrCat("TPU host compilation cache miss: cache_key(", + cache_key, "), session_name(", session_name, ")"); + + TRACESTRING(msg); + LOG(INFO) << msg; + + // Check if caller has disabled compilation. Set using + // internal::ScopedTpuCompileDisabler. + if (!IsTpuCompilationEnabled()) { + const string error_msg = strings::StrCat( + "[TpuCompilationDisabled]: Compilation cache miss, but compilation " + "disabled, session_name(", + session_name, ") Debug String: ", subgraph_key.debug_string); + if (VLOG_IS_ON(2)) { + VLOG(2) << "Cache Missed. Current cache entries: "; + for (auto it = cache_store_.begin(); it != cache_store_.end(); ++it) { + // TODO(henrytan): add DebugKey as cache_entry_debug_string to + // TpuCompilationCacheKey. + VLOG(2) << "Cache Debug Info: "; + VLOG(2) << it->second->cache_entry_debug_string; + } + } + + LOG_EVERY_N_SEC(WARNING, 30) << error_msg; + return errors::NotFound(error_msg); + } + + // The single ref on the newly-created entry is owned by the caller. + VLOG(1) << "Before adding new entry for key " << cache_key + << " with session_name( " << session_name << ");" + << "; cache is " << cache_store_.size() << " entries (" + << cache_size_ + marked_for_eviction_size_ << " bytes), " + << " marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + // Note that InitializeEntry() will Release/Reacquire mu_. + entry = InitializeEntry(cache_key, compile_function, subgraph_key); + TRACELITERAL("TPU host compilation cache: compilation done."); + + LOG(INFO) << strings::StrCat( + "TPU host compilation cache: compilation done for cache_key(", + cache_key, "), session_name(", session_name, ")"); + // If session_name is present, log some additional stats related to HBM + // here, so that they can be associated directly to the session. + if (!session_name.empty()) { + entry->tpu_program->LogProgramMemorySummary(); + } + } else { + TpuCompilationCacheMetrics::IncrementCacheLookupCount(true, session_name); + const string msg = + strings::StrCat("TPU host compilation cache hit: cache_key(", cache_key, + "), session_name(", session_name, ")"); + TRACESTRING(msg); + VLOG(1) << msg; + VLOG(1) << "Before refreshing entry for key " << cache_key + << " with session_name( " << session_name << "); cache is " + << cache_store_.size() << " entries (" + << cache_size_ + marked_for_eviction_size_ << " bytes), " + << " marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + entry = iter->second; + // Make a new reference that is owned by the caller. + entry->Ref(); + // Block if necessary until the subgraph has been initialized. + mu_.Await(absl::Condition( + +[](CompilationEntry* e) { return e->initialized; }, entry)); + } + + // Let the caller know the uid of the entry. + *uid = entry->uid; + // Let the caller know the keys for each of the cached protos. + *proto_key = entry->proto_key; + *may_modify_variables = entry->tpu_program->may_modify_variables(); + *hlo_metadata = entry->hlo_metadata; + + // If the caller didn't supply a per_step_ref_holder then the caller is going + // to manually release the reference later via a call to Release(). + if (per_step_ref_holder == nullptr) { + ++entry->external_references; + } else { + // The caller wants its reference to be handed off to a per-step holder that + // will discard the reference when the step completes. + RefHolder* cast_ref_holder = static_cast(per_step_ref_holder); + TF_RET_CHECK(cast_ref_holder != nullptr); + cast_ref_holder->AddRef(entry); + } + + // Remove the old LRU-table entry if it wasn't already marked for eviction. + auto erased = entries_by_last_use_.erase(entry->last_use); + // Update the LRU table indicating this entry is the most recently used. + entry->last_use = use_counter_++; + entries_by_last_use_[entry->last_use] = entry; + if (erased == 0) { + // The entry had been marked for eviction, or is newly created. + LookupEntryMarkedForEviction(entry, removed_entries); + } + + // Log a little more verbosely when a key is added. + if (VLOG_IS_ON(1) || is_new_key) { + LOG(INFO) << "After " << (is_new_key ? "adding" : "refreshing") + << " entry for key " << cache_key << " with session_name " + << session_name << " cache is " << cache_store_.size() + << " entries (" << cache_size_ + marked_for_eviction_size_ + << " bytes), " + << " marked for eviction " + << (cache_store_.size() - entries_by_last_use_.size()) + << " entries (" << marked_for_eviction_size_ << " bytes)."; + } + return entry->initialization_status; +} + +tensorflow::Status TpuCompilationCacheInterface::CompileIfKeyAbsent( + const TpuCompilationCacheKey& cache_key, + const tensorflow::SessionMetadata* session_metadata, + TpuCompilationRefHolder* per_step_ref_holder, int64* uid, + std::vector* proto_key, std::vector* may_modify_variables, + std::vector>* hlo_metadata, + const std::function& compile_function) { + std::vector removed_entries; + auto status = CompileIfKeyAbsentHelper( + cache_key, session_metadata, per_step_ref_holder, uid, proto_key, + may_modify_variables, &removed_entries, hlo_metadata, compile_function); + for (auto entry : removed_entries) { + UnloadAndDestroy(entry); + } + return status; +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h new file mode 100644 index 00000000000..b6cdbe9fa0b --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -0,0 +1,394 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ +#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ + +#include +#include +#include +#include + +#include "absl/container/node_hash_map.h" +#include "absl/strings/string_view.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" +#include "tensorflow/core/tpu/kernels/tpu_program.h" + +namespace tensorflow { +namespace tpu { + +const char kCompilationCacheResourceName[] = "tpu_compilation_cache"; +const char kCompilationCacheUnloaderResourceName[] = + "tpu_compilation_cache_unloader"; + +// Base class that holds references to compiled protos so that the protos are +// not garbage-collected before being used by execute ops. Use +// TpuCompilationCache::MakePerStepRefHolder to create an instance of a concrete +// ref holder object. +class TpuCompilationRefHolder : public ResourceBase { + public: + ~TpuCompilationRefHolder() override = default; +}; + +class TpuCompilationCacheInterface : public ResourceBase { + public: + using Status = ::stream_executor::port::Status; + + // An entry in the compilation cache. The entry is deleted once it has been + // marked for eviction from the cache _and_ all steps that use it have + // completed. When the entry is first created, it is uninitialized and a + // client-supplied compilation function is run outside the cache's lock to + // generate the programs to be stored in the entry. Any other client that + // requests the entry will block until it has been initialized. Each entry has + // a last_use value that set from a monotonically-increasing counter in the + // cache whenever the entry is referenced. When the cache becomes full, + // entries are marked for eviction in LRU order. + // + // The bridge can request XLA to generate separate sharding and unsharding + // programs along with the main program; we use nested fields sharding_entry, + // unsharding_entry to store them under the main entry, and these two fields + // must be either both present or both absent. They have a back pointer + // main_entry to refer to the main program. These nested entries share the + // same cache key and the same lifetime as the main entry, so we use the + // refcount on the main entry to track the access to any of them. + // /-------------------------------\ + // v \ + // main_entry (refcount) -> sharding_entry -> main_entry + // ^ \ + // | \-> unsharding_entry -> main_entry + // \--------------------------------------/ + struct CompilationEntry : public core::RefCounted { + TpuCompilationCacheInterface* parent = nullptr; // Not owned. + bool initialized = false; + + // The Status returned by the compilation function when the entry is + // initialized. This status will be returned to any client that requests the + // entry. + Status initialization_status; + + // The uid describing this entry. + int64 uid; + std::vector proto_key; + + // Counter to keep track of LRU entries for the eviction policy. + int64 last_use = -1; + + // The unique key describing this entry. + std::string subgraph_key; + + // Entries representing the associated sharding and unsharding programs, + // which share the same life time of the owning main entry, so we always use + // the main entry's ref count. + std::unique_ptr sharding_entry; + std::unique_ptr unsharding_entry; + + // The number of 'external' client-held references to the entry. + int external_references = 0; + + std::vector> hlo_metadata; + + // The sum of the SpaceUsed of each of the elements of programs; an estimate + // of how much RAM the entry consumes, used to determine when entries must + // be marked for eviction. + int64 total_size = 0; + + // Only used for the nested sharding/unsharding entries to point to the + // owning main entry. + CompilationEntry* main_entry = nullptr; + + // Debug info in case we miss. + string cache_entry_debug_string; + + // Compiled Tpu program. + std::unique_ptr tpu_program; + }; + + explicit TpuCompilationCacheInterface(int64_t max_cache_size); + ~TpuCompilationCacheInterface() override; + TpuCompilationCacheInterface(const TpuCompilationCacheInterface&) = delete; + TpuCompilationCacheInterface& operator=(const TpuCompilationCacheInterface&) + = delete; + + Status CompileIfKeyAbsent( + const TpuCompilationCacheKey& cache_key, + const SessionMetadata* session_metadata, + TpuCompilationRefHolder* per_step_ref_holder, int64* uid, + std::vector* proto_key, std::vector* may_modify_variables, + std::vector>* hlo_metadata, + const std::function& compile_function); + + static TpuCompilationCacheKey CreateCompilationCacheKey( + absl::string_view function_name, uint64 function_library_fingerprint, + absl::string_view mlir_module, + const tensorflow::OpInputList& guaranteed_constants, + const std::vector& dynamic_shapes, + const tensorflow::tpu::TPUCompileMetadataProto& metadata, + const TpuMeshStateInterface& mesh_state); + + string DebugString() const override { return "TpuCompilationCacheInterface"; } + + // Makes a reference holder for this cache, that can be stored in the per-step + // resource manager and will ensure that compiled entries persist until the + // end of a step. + TpuCompilationRefHolder* MakePerStepRefHolder(); + + // Differences between MarkEntryForEviction and Release: + // There are two modes of managing cache entries: + // 1) LRU eviction + pinning; 2) manual. + // We use mode 1) if CompilationRefHolder is provided to CompileIfKeyAbsent. + // Otherwise it is manual mode (mainly used by XRT). + // MarkEntryForEviction should only be used in mode 1) to eagerly evict cache + // entries when callers know that they do not need them anymore. + // Release should only be used in mode 2) to explicitly remove an entry. + + // Mark the entry indexed by `subgraph_uid` for eviction. This should only be + // called if per_step_ref_holder was NOT nullptr in the corresponding call to + // CompileIfKeyAbsent(subgraph_key, ...). Otherwise, use Release(int64 + // subgraph_uid). + Status MarkEntryForEviction(int64 subgraph_uid); + + // Manually discards a reference to the compiled subgraph. This should only be + // called if per_step_ref_holder was nullptr in the corresponding call to + // CompileIfKeyAbsent(subgraph_key, ...). + Status Release(int64 subgraph_uid); + + // Looks up an executable corresponding to the model-parallel core index of + // the subgraph represented by key. On success a pointer to an EntryRef + // holding the program is returned in entry. + Status Lookup(const string& proto_key, + std::unique_ptr* entry); + + // Looks up an executable corresponding to the model-parallel core index of + // the subgraph represented by uid. On success a pointer to an EntryRef + // holding the program is returned in entry. + Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry); + + // Mutates the main entry ref to point to the entry's subentry + // (for sharding/unsharding) or main entry (unchanged) representing the + // fetch target. The entry ref needs to point to the main entry before this + // call. + // + // If the requested subentry does not exist, the ref will point to a nullptr + // entry. + Status ToSubEntryRef(CompilationCacheEntryRef* entry, + CompilationCacheFetchTarget fetch_target) const; + + private: + // Wrapper for a cache entry that holds a reference to the entry until the + // wrapper is deleted. This wrapper is the concrete type of + // CompilationCacheEntryRef returned by Lookup. + class EntryRefImpl : public CompilationCacheEntryRef { + public: + EntryRefImpl(TpuCompilationCacheInterface* parent, CompilationEntry* entry, + int index); + ~EntryRefImpl() override; + + CompilationCacheEntry get() override; + + // Mutates this ref to point to the entry's subentry (for + // sharding/unsharding) or main entry (unchanged) as specified by + // fetch_target. The refcount is kept unchanged, since we only track the + // refcount of the main entry. The entry ref needs to point to the main + // entry before this call. + // + // If the requested subentry does not exist, the ref will point to a nullptr + // entry, and the original entry will be unref'ed. + Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target); + + private: + TpuCompilationCacheInterface* parent_; // Not owned. + // A reference to entry_ is acquired in the constructor and released via + // parent->DiscardEntryRefs in the destructor. + CompilationEntry* entry_; + // The program in entry_ that is returned by the get method. + int index_; + }; + + // Private implementation of the generic CompilationRefHolder that knows about + // CompiledSubgraph entries. + class RefHolder : public TpuCompilationRefHolder { + public: + explicit RefHolder(TpuCompilationCacheInterface* parent) : parent_(parent) { + parent_->Ref(); + } + ~RefHolder() override { + // Release our reference to the parent. + parent_->Unref(); + } + + // Adds entry to the list of entries that will be released when the + // RefHolder is destroyed. Each entry is released via a call to + // parent_->DiscardEntryRefs. + void AddRef(CompilationEntry* entry) { + entries_.push_back(entry); + } + + string DebugString() const override { + return "TpuCompilationCacheInterface::RefHolder"; + } + + private: + TpuCompilationCacheInterface* parent_; // Not owned. + std::vector entries_; + }; + + // The bulk of implementation of CompileIfKeyAbsent() with the exception + // of unloading programs that corresponds to possibly removed cache + // entries. The split helps to manage locking since we prefer to perform + // unloading without holding extra locks. + Status CompileIfKeyAbsentHelper( + const TpuCompilationCacheKey& subgraph_key, + const SessionMetadata* session_metadata, + TpuCompilationRefHolder* per_step_ref_holder, int64* uid, + std::vector* proto_key, std::vector* may_modify_variables, + std::vector* removed_entries, + std::vector>* hlo_metadata, + const std::function& compile_function); + + // This is called by the cache when entry is marked for eviction; by + // a RefHolder (via DiscardEntryRefs) when a step completes; and by + // an EntryRefImpl when it is destroyed. Releases one reference to entry + // if more than 1 remains. If only one reference is left, the entry is removed + // from cache_ and is returned to the caller; which must eventually call + // UnloadAndDestroy(). We do not call UnloadAndDestroy within DiscardEntryRef + // to avoid holding the lock during program unloading. + ABSL_MUST_USE_RESULT CompilationEntry* DiscardEntryRef( + CompilationEntry* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + // Convenience method called by ~RefHolder without mu_ held. Calls + // DiscardEntryRef on every element of entries. + void DiscardEntryRefs( + gtl::ArraySlice entries); + + // Marks the oldest unmarked entry for eviction. Requires that there is at + // least one such entry. In case the evicted entry had only 1 reference it + // is removed from the cache and returned to the caller which must eventually + // call UnloadAndDestroy. + CompilationEntry* MarkOldestEntryForEviction() + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Updates datastructures to indicate that entry, which had been marked for + // eviction, has been looked up. This is called by CompileIfKeyAbsent when an + // entry is newly created, or an entry that has been marked for eviction but + // not yet evicted is looked up. + // + // First the entry is unmarked for eviction, i.e. the cache gains a reference + // to entry, entry's last_use field is set to be the most recent value of + // use_counter_ and entries_by_last_use_ is updated accordingly. + // + // Next, the size of the cache is examined to see if any other entries need to + // be marked for eviction now that entry has been unmarked. While the total + // size of unmarked cached entries is greater than max_cache_size_, entries + // are marked for eviction in LRU order. The most recently used entry is never + // marked for eviction, so an entry larger than the max cache size will remain + // in the cache until it is replaced by something else. In case some entries + // actually were removed from the cache, they are a returned to the caller via + // removed_entries. The caller must eventually delete them by calling + // UnloadAndDestroy. + void LookupEntryMarkedForEviction( + CompilationEntry* entry, std::vector* removed_entries) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Removes the entry with given key from cache. + size_t RemoveEntry(const string& key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Inserts the given key and entry to cache. + void InsertEntry(const std::string& key, + const TpuCompilationCacheKey& subgraph_key, + CompilationEntry* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Returns the cache key matching given subgraph_key. + std::string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) const + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Creates a new entry by running initialize_programs and places it in the + // cache to be looked up by key. The new entry is in the 'marked for eviction' + // state (not present in entries_by_last_use_) and the caller is expected to + // call LookupEntryMarkedForEviction after InitializeEntry. + // + // **InitializeEntry releases mu_ during the call to initialize_programs.** + CompilationEntry* InitializeEntry( + const string& key, + const std::function& initialize_program, + const TpuCompilationCacheKey& subgraph_key) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Unloads the program associated with the entry from all local devices + // and deletes the entry itself. It is assumed no one else has a reference + // to it and all related keys had already been removed from the cache. + // The call can perform device IO so no locks should be held while calling it. + void UnloadAndDestroy(CompilationEntry* entry) ABSL_LOCKS_EXCLUDED(mu_); + + // The maximum size of entries that are stored in the cache before entries are + // marked for eviction. + const int64 max_cache_size_; + + mutable absl::Mutex mu_; + // The total size of entries that are stored and not marked for eviction. + int64 cache_size_ ABSL_GUARDED_BY(mu_) = 0; + + // The total size of entries that are marked for eviction. + int64 marked_for_eviction_size_ ABSL_GUARDED_BY(mu_) = 0; + + // The value to assign to the last_use field of the next entry that is looked + // up. + int64 use_counter_ ABSL_GUARDED_BY(mu_) = 0; + + // session_key_map_ and fingerprint_key_map_ are used for looking up the + // cache_ key matching a given subgraph key. When doing a lookup, check + // session_key_map_ first to avoid unnecessay fingerprint computation. + // Map from key prefix + session_handle to a cache_ key. + std::unordered_map session_key_map_ ABSL_GUARDED_BY(mu_); + + // Map from key prefix + fingerprint to a cache_ key. + std::unordered_map fingerprint_key_map_ ABSL_GUARDED_BY(mu_); + + // All the subgraph entries that can be looked up in the cache. An entry is + // marked for eviction iff it is present in cache_ and not in + // entries_by_last_use_. + std::unordered_map cache_store_ + ABSL_GUARDED_BY(mu_); + + // All the subgraph entries that can be looked up in the cache, indexed by + // uid. + absl::node_hash_map entries_by_uid_ + ABSL_GUARDED_BY(mu_); + + // All the protos that can be looked up in the cache, indexed by proto + // key. The value of the map is a subgraph and the index of the proto compiled + // for that subgraph. + std::unordered_map> + entries_by_proto_key_ ABSL_GUARDED_BY(mu_); + + // Map from last_use to entry, used to mark entries for eviction in LRU + // order. If an entry's last_use counter is not present as a key in + // entries_by_last_use_ then the entry has been marked for eviction. + std::map entries_by_last_use_ ABSL_GUARDED_BY(mu_); +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h new file mode 100644 index 00000000000..49c2eb64944 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h @@ -0,0 +1,53 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_KEY_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_KEY_H_ + +#include +#include + +#include "absl/types/optional.h" + +namespace tensorflow { +namespace tpu { + +struct TpuCompilationCacheKey { + // Prefix of the key. + std::string prefix; + + // A boolean flag to specify if `guaranteed_const` is used. Guarantee const is + // normally used in TPU inference to avoid re-copying unchanged variables onto + // the TPU device. It promises the value is identical for every execution in + // the same session even if the actual value changes in later executions. + bool has_guaranteed_const = false; + + // Unique session identifier. It is set when `has_guaranteed_const` is true. + std::string session_handle; + + // Fingerprint of `guaranteed_const` value. It is set when the value of the + // `has_guaranteed_const` is true. Produce the value when necessary. + std::function guaranteed_const_fingerprint; + + // A more verbose key for debugging purpose. + std::string debug_string; + + explicit TpuCompilationCacheKey() {} + explicit TpuCompilationCacheKey(const std::string& p) : prefix(p) {} +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_KEY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc new file mode 100644 index 00000000000..f4f8dbfc80f --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc @@ -0,0 +1,93 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h" + +#include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h" + +namespace tensorflow { +namespace tpu { + +namespace { +class CompilationCacheFetchTargetUtility { + public: + CompilationCacheFetchTargetUtility() + : names_({"Invalid", "Main", "Sharding", "Unsharding"}) {} + + std::string name(CompilationCacheFetchTarget target) const { + return names_[static_cast(target)]; + } + + private: + const std::vector names_; +}; + +std::string GetName(CompilationCacheFetchTarget target) { + static const auto* util = new CompilationCacheFetchTargetUtility(); + return util->name(target); +} + +} // namespace + +TpuCompilationCacheLocalLookup::TpuCompilationCacheLocalLookup( + TpuCompilationCacheInterface* cache) + : cache_(cache) {} + +TpuCompilationCacheLocalLookup::~TpuCompilationCacheLocalLookup() { + cache_->Unref(); +} + +Status TpuCompilationCacheLocalLookup::Lookup( + const string& proto_key, std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) { + profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup", + /*level=*/2); + Status s = cache_->Lookup(proto_key, entry); + VLOG(1) << "Looked up key " << proto_key << " in local subgraph cache status " + << s; + if (!s.ok()) { + return s; + } + s = cache_->ToSubEntryRef(entry->get(), fetch_target); + + VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + << s; + return s; +} + +Status TpuCompilationCacheLocalLookup::Lookup( + int64 uid, int proto_index, + std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) { + profiler::TraceMe proto_lookup_traceme("Local TPU proto cache lookup by uid", + /*level=*/2); + Status s = cache_->Lookup(uid, proto_index, entry); + VLOG(1) << "Looked up uid " << uid << ", index " << proto_index + << " in local subgraph cache status " << s; + if (!s.ok()) { + return s; + } + s = cache_->ToSubEntryRef(entry->get(), fetch_target); + VLOG(1) << "Fetched subentry: " << GetName(fetch_target) << " with status " + << s; + return s; +} + +string TpuCompilationCacheLocalLookup::DebugString() const { + return "TpuCompilationCacheLocalLookup"; +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h new file mode 100644 index 00000000000..138777a438c --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h @@ -0,0 +1,99 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_LOOKUP_H_ +#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_LOOKUP_H_ + +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" + +namespace tensorflow { +namespace tpu { + +// Base class allowing Execute Ops to look up ISA protos. Different subclasses +// are used when the execute Op is in the same address space as the compile Op, +// and when they need to communicate over RPC. +class TpuCompilationCacheLookup : public ResourceBase { + public: + ~TpuCompilationCacheLookup() override = default; + + // Looks up an executable corresponding to the model-parallel core index of + // the subgraph represented by key. On success a wrapper for the proto is + // returned in program. The wrapper is guaranteed to be valid only during the + // execution of the Op requesting the proto. + // + // Only one of the main, sharding, unsharding entries is fetched, as specified + // in fetch_target. + // + // If the compilation does not create sharding/unsharding programs, but the + // fetch_target requests one of them, then after this call + // (*entry)->get().get_executable() will return nullptr. + virtual Status Lookup(const string& proto_key, + std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) = 0; + + virtual Status Lookup(const string& proto_key, + std::unique_ptr* entry) { + return Lookup(proto_key, std::move(entry), + CompilationCacheFetchTarget::MAIN); + } + + // Looks up an executable corresponding to the model-parallel core index of + // the subgraph represented by uid. On success a wrapper for the proto is + // returned in program. The wrapper is guaranteed to be valid only during the + // execution of the Op requesting the proto. + virtual Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) = 0; + + virtual Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry) { + return Lookup(uid, proto_index, std::move(entry), + CompilationCacheFetchTarget::MAIN); + } +}; + +// Forward declaration to break cycle dependency graph. +class TpuCompilationCacheInterface; + +// Class for looking up ISA protos when the execute and compile Op are in the +// same address space. The proto is simply looked up in the compilation cache, +// without any serialization taking place. +class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { + public: + explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheInterface* cache); + ~TpuCompilationCacheLocalLookup() override; + + Status Lookup(const string& proto_key, + std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) override; + + Status Lookup(int64 uid, int proto_index, + std::unique_ptr* entry, + CompilationCacheFetchTarget fetch_target) override; + + string DebugString() const override; + + private: + // The subgraph compilation cache, in the same process address space where the + // lookups are happening. + TpuCompilationCacheInterface* cache_; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_LOOKUP_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.cc new file mode 100644 index 00000000000..ba4e2ccff93 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.cc @@ -0,0 +1,32 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h" + +namespace tensorflow { +namespace tpu { + +/* static */ +void TpuCompilationCacheMetrics::IncrementCacheLookupCount( + bool is_cache_hit, absl::string_view session_name) { + // A placeholder for tracking metrics. +} + +/* static */ +void TpuCompilationCacheMetrics::SetCacheEntryCount(int64 count) { + // A placeholder for tracking metrics. +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h new file mode 100644 index 00000000000..e30a7a4c013 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_METRICS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_METRICS_H_ + +#include "absl/strings/string_view.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace tpu { + +// Tracks Tpu compilation cache metrics. +class TpuCompilationCacheMetrics { + public: + // Increments the number of cache lookup count. + static void IncrementCacheLookupCount(bool is_cache_hit, + absl::string_view session_name); + + // Sets the total count of cache entries. + static void SetCacheEntryCount(int64 count); +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_METRICS_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile.proto b/tensorflow/core/tpu/kernels/tpu_compile.proto new file mode 100644 index 00000000000..5b70de67a05 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile.proto @@ -0,0 +1,144 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +syntax = "proto3"; + +package tensorflow.tpu; + +import "tensorflow/compiler/tf2xla/host_compute_metadata.proto"; +import "tensorflow/compiler/xla/service/hlo.proto"; +import "tensorflow/compiler/xla/xla_data.proto"; +import "tensorflow/core/framework/tensor.proto"; +import "tensorflow/core/framework/tensor_shape.proto"; +import "tensorflow/core/framework/types.proto"; +import "tensorflow/core/protobuf/tpu/compile_metadata.proto"; +import "tensorflow/core/tpu/kernels/tpu_executable_info.proto"; + +message PerCoreVariableIndices { + // For each resource variable output, what was the index of the corresponding + // input and was it updated? The indices are sorted by input order. + repeated TPUExecutableInfoProto.UpdateIndexPair variable_indices = 1; +} + +message PerCoreArgShapes { + // Argument shapes for each Tpu core. + repeated xla.ShapeProto shapes = 1; +} + +message PerCoreOutputShapes { + // Output shapes for each Tpu core. + repeated xla.ShapeProto shapes = 1; +} + +message OutputDescriptionProto { + // Type and shape of the output. The shape is the unflattened shape. + // When `type` is DT_RESOURCE, `shape` is the shape of the resource + // variable's value. + tensorflow.DataType type = 1; + tensorflow.TensorShapeProto shape = 2; + + // Constant output value, if known to be constant at JIT compilation time. + // 'Tensor' is in host memory. + bool is_constant = 3; + tensorflow.TensorProto constant_value = 4; + + // When this output is a resource, i.e. `type == DT_RESOURCE`, this is + // the index of the input that contains the resource. + int32 input_index = 5; + + // Whether this output is a TensorList. + bool is_tensor_list = 6; +} + +// Describes a variable write side effect of the computation. +message ResourceUpdateProto { + // Index of the input that contains the variable resource to write to. + int32 input_index = 1; + + // Type and shape of the tensor to be written back. + // The `shape` field has the same meaning as the Argument::shape field. + tensorflow.DataType type = 2; + tensorflow.TensorShapeProto shape = 3; + + // Was the value of the variable modified by the computation? + // (Always true, unless `return_updated_values_for_all_resources` is true.) + bool modified = 4; + + // If the resource is a TensorArray, the set of gradients read or written. + map tensor_array_gradients_accessed = 5; +} + +// Describes the result of a XLA Compiler compilation. +message XlaCompilationResultProto { + // Vector that maps from the parameters of the XLA computation to their + // original argument positions. To handle compile-time constant inputs, the + // parameters to the XLA computation may be a subset of the original + // arguments. The relative ordering of parameters are maintained. + repeated int32 input_mappings = 1; + + // Input shapes of the computation. If we are flattening inputs, these are + // the flattened shapes. + repeated xla.ShapeProto xla_input_shapes = 2; + + // Output shape in XLA format. The output shape is always a tuple. If we + // are flattening outputs, these are the flattened shapes. + xla.ShapeProto xla_output_shape = 3; + + // TensorFlow shapes of outputs, together with the values of any + // constant arguments. Vector indexed by Tensorflow _Retval number, + // containing both constant and non-constant results. + repeated OutputDescriptionProto outputs = 4; + + // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their + // matching RecvAtHost/SendFromHost Ops in the outer graph. + tf2xla.HostComputeMetadata host_compute_metadata = 5; + + // Resources whose values were updated by the computation, ordered + // by return value position (which is the same as the order the resources + // were passed as arguments). Resource updates follow the non-constant + // results in the outputs of XLA computation. + repeated ResourceUpdateProto resource_updates = 6; + + // The XLA computation built from the tensorflow subgraph. + xla.HloModuleProto computation = 7; +} + +// TpuAotCompilationRequestProto represents a compilation request for performing +// ahead-of-time (AOT) compilation of XLA Computations into XLA HLO IR. +message TpuAotCompilationRequestProto { + // A set of HLO module built to run concurrently + // across different devices. + xla.HloModuleGroupProto hlo_module_group = 1; + + // Compilation metadata. + TPUCompileMetadataProto metadata = 2; + + // DeviceAssignmentProto is a serialized form of DeviceAssignment class, which + // represents the device ids assigned to a set of replicated computations. + // See xla::DeviceAssignment class comment for more details. + xla.DeviceAssignmentProto device_assignment = 3; + + // Per TPU core program arguments shapes. + repeated PerCoreArgShapes per_core_arg_shapes = 4; + + // Per TPU core program outputs shapes. + repeated PerCoreOutputShapes per_core_output_shapes = 5; + + // Per TPU core information containing what was the index of the corresponding + // input and if whether it was updated. The indices are sorted by input order. + repeated PerCoreVariableIndices per_core_variable_indices = 6; + + // XLA compiler compilation result. + XlaCompilationResultProto compilation_result = 7; +} diff --git a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h new file mode 100644 index 00000000000..53e79aa51b0 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h @@ -0,0 +1,119 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_C_API_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_C_API_H_ + +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +enum TpuCoreTypeEnum { + kTensorCore, + kEmbeddingV1, + kEmbeddingV2, +}; + +typedef struct XLA_TpuProgram XLA_TpuProgram; + +// Property for creating compilation cache key. +struct CompilationCacheKeyProperty { + const char* config_prefix; + const char* shapes_prefix; + const char* function_name; + const char* mlir_module; + const int32_t* device_ids; + size_t device_ids_size; + int32_t guaranteed_constants_size; + uint64_t function_library_fingerprint; + int32_t num_cores_per_replica; + int32_t num_replicas; + const XLA_TpuMeshState* mesh_state; +}; + +extern "C" { + +// Creates a new TPU program. +XLA_TpuProgram* TpuProgram_New(); + +// Destroys the `tpu_program`. +void TpuProgram_Free(XLA_TpuProgram* tpu_program); + + +// Unloads and destroys the `tpu_program`. Once the TPU program is unloaded and +// destroyed, it is in an unusable state. +void TpuProgram_UnloadAndDestroy(XLA_TpuProgram* tpu_program, + SE_Status* status); + +// Gets TPU program size in bytes from the `tpu_program`. +int64_t TpuProgram_GetProgramSize(const XLA_TpuProgram* tpu_program); + +// Logs the summary of current memory state snapshot of the `tpu_program`. +bool TpuProgram_LogProgramMemorySummary(const XLA_TpuProgram* tpu_program); + +// Gets TPU program executable info from the `tpu_program`. +void TpuProgram_GetExecutableInfo(const XLA_TpuProgram* tpu_program, + TpuSerializedProto* executable_info); + +// Gets host transfer info proto. +void TpuProgram_GetHostTransferInfo( + const XLA_TpuProgram* tpu_program, + TpuSerializedProto* host_transfer_info); + +// Gets HLO metadata proto. +void TpuProgram_GetHloMetadata(const XLA_TpuProgram* tpu_program, + TpuSerializedProto* hlo_metadata); + +// Returns the number of available TPU core count. +int TpuTopology_AvailableCoreCount(const XLA_TpuMeshState* mesh_state, + TpuCoreTypeEnum tpu_core_type); + +// Creates a unique compilation cache `key` used for `put` and `get` operations. +// Returned buffer is heap-allocated and must be owned. +const char* TpuCompile_CreateCompilationCacheKey( + CompilationCacheKeyProperty property); + +// Creates a guaranteed const fingerprint. Guarantee const is normally used in +// TPU inference to avoid re-copying unchanged variables onto the TPU device. +// It promises the value is identical for every execution in the same session +// even if the actual value changes in later executions. +uint64_t TpuCompile_CreateGuaranteedConstFingerprint(uint64_t fingerprint, + const char* data, + size_t size); + +// Checks if whether a TPU compilation is enabled. +bool TpuCompile_IsTpuCompilationEnabled(); + +// Executes the computations using XLA TPU compiler and returns TPU programs +// ready for execution. +void TpuCompile_CompileAheadOfTime( + TpuSerializedProto aot_compilation_request, + XLA_TpuProgram** tpu_programs[], + size_t* count, SE_Status* status); + +// Builds `DeviceAssignment` from `TpuCompileMetadata` serialized proto. +void TpuCompile_BuildXLADeviceAssignment( + TpuSerializedProto serialized_tpu_compile_metadata, + const XLA_TpuMeshState* mesh_state, + TpuSerializedProto* serialized_device_assignment, SE_Status* status); + +// Converts an XLA `Shape` into its equivalent TPU `Shape` representation. +void TpuCompile_ToTpuShapeRepresentation( + TpuSerializedProto serialized_xla_shape, int data_type, + bool use_fast_memory, TpuSerializedProto* serialized_tensor_shape, + SE_Status* status); + +} // extern "C" + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_C_API_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_options.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_options.cc new file mode 100644 index 00000000000..49a2a089adf --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_options.cc @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op_options.h" + +namespace tensorflow { +namespace internal { + +namespace { +static bool tpu_compilation_cancellation_terminates_process = true; +static bool tpu_compilation_failure_closes_chips = true; +} // namespace + +void SetTpuCompilationCancellationTerminatesProcess(bool b) { + tpu_compilation_cancellation_terminates_process = b; +} + +bool TpuCompilationCancellationTerminatesProcess() { + return tpu_compilation_cancellation_terminates_process; +} + +void SetTpuCompilationFailureClosesChips(bool value) { + tpu_compilation_failure_closes_chips = value; +} + +bool TpuCompilationFailureClosesChips() { + return tpu_compilation_failure_closes_chips; +} + +} // namespace internal +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_options.h b/tensorflow/core/tpu/kernels/tpu_compile_op_options.h new file mode 100644 index 00000000000..b81fe4a3b75 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_options.h @@ -0,0 +1,42 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_OPTIONS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_OPTIONS_H_ + +#include + +namespace tensorflow { +namespace internal { + +// Setter and getter that determine how TPUCompile responds to cancelled +// compilation. By default this is true, meaning cancelled compilation will +// abort the process, since that's the only mechanism we have available. +// +// Setting this to false allows the process to remain alive, and should only be +// used in tests. +void SetTpuCompilationCancellationTerminatesProcess(bool b); +bool TpuCompilationCancellationTerminatesProcess(); + +// Setter and getter that determine whether TPU compilation failure will cause +// chips to close. By default this is true, it is suitable for training. For +// inference, we never want servers to die and thus chips will keep alive. +// See b/109873767. +void SetTpuCompilationFailureClosesChips(bool value); +bool TpuCompilationFailureClosesChips(); + +} // namespace internal +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_OPTIONS_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc new file mode 100644 index 00000000000..d42c604fd1e --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.cc @@ -0,0 +1,439 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" + +#include "tensorflow/compiler/xla/debug_options_flags.h" +#include "tensorflow/compiler/xla/service/computation_layout.h" +#include "tensorflow/compiler/xla/service/dump.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" + +namespace tensorflow { +namespace tpu { + +using stream_executor::port::Status; +using stream_executor::port::StatusOr; +using xla::ComputationLayout; +using xla::DebugOptions; +using xla::DeviceAssignment; +using xla::HloModuleConfig; +using xla::HloSharding; +using xla::InvalidArgument; +using xla::ProgramShape; +using xla::Shape; +using xla::ShapeTree; +using xla::ShapeUtil; + +Status ValidateResultShape(const Shape& client_shape, + const Shape& result_shape) { + TF_RETURN_IF_ERROR( + xla::ShapeUtil::ValidateShapeWithOptionalLayout(client_shape)); + if (!xla::ShapeUtil::Compatible(client_shape, result_shape)) { + return InvalidArgument( + "Shape used to set computation result layout %s is not compatible " + "with result shape %s", + xla::ShapeUtil::HumanStringWithLayout(client_shape), + xla::ShapeUtil::HumanString(result_shape)); + } + return Status::OK(); +} + +StatusOr> CreateModuleConfig( + const ProgramShape& program_shape, absl::Span argument_shapes, + absl::optional result_layout, + absl::optional device_assignment, int replica_count, + int num_partitions, const DebugOptions* debug_options, const int* seed, + const int* launch_id, const bool* alias_passthrough_params, + const xla::FusionConfigCollection* fusion_config_collection, + const std::vector>* fusion_config) { + auto config = absl::make_unique(program_shape); + ComputationLayout* computation_layout = + config->mutable_entry_computation_layout(); + if (program_shape.parameters_size() != argument_shapes.size()) { + return InvalidArgument("computation takes %d parameters, but %u given", + program_shape.parameters_size(), + argument_shapes.size()); + } + for (int i = 0; i < argument_shapes.size(); ++i) { + // Verify that shape of arguments matches the shape of the arguments in the + // ProgramShape. + if (!ShapeUtil::Compatible(argument_shapes[i], + program_shape.parameters(i))) { + return InvalidArgument( + "Argument does not match shape of computation parameter %d: want " + "%s, got %s", + i, ShapeUtil::HumanString(program_shape.parameters(i)), + ShapeUtil::HumanString(argument_shapes[i])); + } + TF_RETURN_IF_ERROR( + computation_layout->mutable_parameter_layout(i)->CopyLayoutFromShape( + argument_shapes[i])); + } + + if (result_layout.has_value()) { + TF_RETURN_IF_ERROR( + ValidateResultShape(result_layout.value(), program_shape.result())); + TF_RETURN_IF_ERROR( + computation_layout->mutable_result_layout()->CopyLayoutFromShape( + result_layout.value())); + } else { + // If the result layout is not set, then choose the default. + computation_layout->mutable_result_layout()->SetToDefaultLayout(); + } + + config->set_replica_count(replica_count); + config->set_num_partitions(num_partitions); + if (seed != nullptr) { + config->set_seed(*seed); + } + if (launch_id != nullptr) { + config->set_launch_id(*launch_id); + } + if (debug_options != nullptr) { + config->set_debug_options(*debug_options); + } else { + config->set_debug_options(xla::GetDebugOptionsFromFlags()); + } + + // TODO(henrytan): set intra_op_parallelism_threads. + // Reference: + // tensorflow/compiler/xla/service/service.cc?l=324. + + if (device_assignment.has_value()) { + config->set_static_device_assignment(device_assignment.value()); + } + + if (alias_passthrough_params != nullptr) { + config->set_alias_passthrough_params(*alias_passthrough_params); + } + + if (fusion_config_collection != nullptr && fusion_config != nullptr && + *fusion_config_collection != xla::FusionConfigCollection::kOff) { + config->set_fusion_config_collection(*fusion_config_collection); + *config->mutable_fusion_config() = *fusion_config; + } + + return std::move(config); +} + +StatusOr> CreateModuleConfig( + const xla::ProgramShape& program_shape, + absl::Span argument_shapes, + absl::optional result_layout, + absl::optional device_assignment, int replica_count, + int num_partitions, const DebugOptions* debug_options) { + return CreateModuleConfig(program_shape, argument_shapes, result_layout, + device_assignment, replica_count, num_partitions, + debug_options, /*seed=*/nullptr, + /*launch_id=*/nullptr, + /*alias_passthrough_params=*/nullptr, + /*fusion_config_collection=*/nullptr, + /*fusion_config=*/nullptr); +} + +ShapeTree GetSubtree( + const ShapeTree& tuple_shape_tree, int element_index) { + ShapeTree element_shape_tree( + xla::ShapeUtil::GetTupleElementShape(tuple_shape_tree.shape(), + element_index), + HloSharding::Replicate()); + + xla::ShapeIndex src_index; + src_index.push_back(element_index); + element_shape_tree.CopySubtreeFrom(tuple_shape_tree, src_index, {}); + return element_shape_tree; +} + +Shape GetPerDeviceShape(const Shape& shape, const HloSharding& sharding, + int64 device) { + if (shape.IsTuple()) { + ShapeTree tuple_shape_tree = sharding.GetAsShapeTree(shape); + std::vector arg_shapes; + for (int64 i = 0; i < xla::ShapeUtil::TupleElementCount(shape); ++i) { + Shape element_shape = xla::ShapeUtil::GetTupleElementShape(shape, i); + HloSharding element_sharding = tuple_shape_tree.element({i}); + if (element_shape.IsTuple()) { + element_sharding = HloSharding::Tuple(GetSubtree(tuple_shape_tree, i)); + } + if (element_sharding.UsesDevice(device)) { + arg_shapes.push_back( + GetPerDeviceShape(element_shape, element_sharding, device)); + } + } + return xla::ShapeUtil::MakeTupleShape(arg_shapes); + } + + if (sharding.IsTileMaximal()) { + return shape; + } + + std::vector dimensions; + std::vector offset = sharding.TileOffsetForDevice(shape, device); + std::vector limit = sharding.TileLimitForDevice(shape, device); + for (int64 i = 0; i < limit.size(); ++i) { + dimensions.push_back(limit[i] - offset[i]); + } + if (shape.has_layout()) { + return xla::ShapeUtil::MakeShapeWithLayout(shape.element_type(), dimensions, + shape.layout().minor_to_major()); + } + return xla::ShapeUtil::MakeShape(shape.element_type(), dimensions); +} + +Status AddVariableUpdatesToCores( + const TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + const std::vector& arg_core_mapping, + std::vector* may_modify_variables, + std::vector>* per_core_output_shapes, + std::vector>>* per_core_variable_indices) { + // Add all variables to the corresponding core. + may_modify_variables->resize(metadata.num_cores_per_replica(), false); + int resource_update_pos = 0; + for (int i = 0; i < metadata.args_size(); ++i) { + const tpu::TPUCompileMetadataProto::Arg& proto_arg = metadata.args(i); + if (proto_arg.kind() == tpu::TPUCompileMetadataProto::Arg::VARIABLE) { + const auto& sharding = proto_arg.sharding(); + bool updated = false; + if (resource_update_pos < compilation_result.resource_updates.size()) { + const XlaCompiler::ResourceUpdate& update = + compilation_result.resource_updates[resource_update_pos]; + if (update.input_index == i) { + updated = true; + int pos = compilation_result.outputs.size() + resource_update_pos; + xla::Shape shape = xla::ShapeUtil::GetTupleElementShape( + compilation_result.xla_output_shape, pos); + auto add_to_core = [&](int64 core, const xla::Shape& per_core_shape) { + (*per_core_output_shapes)[core].push_back(per_core_shape); + (*may_modify_variables)[core] = + (*may_modify_variables)[core] || update.modified; + }; + if (sharding.type() == xla::OpSharding::MAXIMAL) { + add_to_core(sharding.tile_assignment_devices(0), shape); + } else if (sharding.type() == xla::OpSharding::OTHER) { + auto sharding_or = + xla::HloSharding::FromProto(proto_arg.sharding()); + TF_RET_CHECK(sharding_or.ok()); + for (int64 core : proto_arg.sharding().tile_assignment_devices()) { + xla::Shape per_core_shape = + GetPerDeviceShape(shape, sharding_or.ValueOrDie(), core); + add_to_core(core, per_core_shape); + } + } else { + TF_RET_CHECK(sharding.type() == xla::OpSharding::REPLICATED); + for (int64 core = 0; core < metadata.num_cores_per_replica(); + ++core) { + add_to_core(core, shape); + } + } + ++resource_update_pos; + } + } + if (sharding.type() == xla::OpSharding::MAXIMAL) { + (*per_core_variable_indices)[sharding.tile_assignment_devices(0)] + .push_back( + std::pair(arg_core_mapping[i].indices[0], updated)); + } else if (sharding.type() == xla::OpSharding::OTHER) { + for (int core : sharding.tile_assignment_devices()) { + (*per_core_variable_indices)[core].push_back( + std::pair(arg_core_mapping[i].indices[core], updated)); + } + } else { + TF_RET_CHECK(sharding.type() == xla::OpSharding::REPLICATED); + for (int64 core = 0; core < metadata.num_cores_per_replica(); ++core) { + (*per_core_variable_indices)[core].push_back( + std::pair(arg_core_mapping[i].indices[core], updated)); + } + } + } + } + return Status::OK(); +} + +Status ComputeOutputShapesForEachCore( + const tpu::TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + std::vector>* per_core_output_shapes) { + for (int i = 0; i < metadata.retvals_size(); ++i) { + const tpu::TPUCompileMetadataProto::Retval& retval = metadata.retvals(i); + TF_RET_CHECK(!compilation_result.outputs[i].is_constant) + << "TPU compilation output " << i + << " has a compile-time constant value. " + "This should never happen."; + + xla::Shape shape = xla::ShapeUtil::GetTupleElementShape( + compilation_result.xla_output_shape, i); + auto add_shape_to_core = [&](int core, xla::Shape per_core_shape) { + (*per_core_output_shapes)[core].push_back(std::move(per_core_shape)); + }; + if (retval.sharding().type() == xla::OpSharding::MAXIMAL) { + add_shape_to_core(retval.sharding().tile_assignment_devices(0), + std::move(shape)); + } else if (retval.sharding().type() == xla::OpSharding::OTHER) { + auto sharding_or = xla::HloSharding::FromProto(retval.sharding()); + TF_RET_CHECK(sharding_or.ok()); + for (int64 core : retval.sharding().tile_assignment_devices()) { + xla::Shape per_core_shape = + GetPerDeviceShape(shape, sharding_or.ValueOrDie(), core); + add_shape_to_core(core, std::move(per_core_shape)); + } + } else { + TF_RET_CHECK(retval.sharding().type() == xla::OpSharding::REPLICATED) + << "Not all of the constant tensors were consumed."; + for (int core = 0; core < per_core_output_shapes->size(); ++core) { + add_shape_to_core(core, shape); + } + } + } + return Status::OK(); +} + +Status CreateHloModules( + const TPUCompileMetadataProto& metadata, + const tensorflow::XlaCompiler::CompilationResult& compilation_result, + const absl::optional& device_assignment, + std::vector>* hlo_modules) { + TF_RET_CHECK( + compilation_result.computation->proto().has_host_program_shape()); + + auto debug_options = xla::DebugOptions(); + debug_options.set_xla_step_marker_location(metadata.step_marker_location()); + TF_ASSIGN_OR_RETURN( + std::unique_ptr module_config, + CreateModuleConfig( + xla::ProgramShape( + compilation_result.computation->proto().host_program_shape()), + compilation_result.xla_input_shapes, + compilation_result.xla_output_shape, device_assignment, + metadata.num_replicas(), metadata.num_cores_per_replica(), + &debug_options)); + + TF_ASSIGN_OR_RETURN( + std::unique_ptr hlo_module, + xla::HloModule::CreateFromProto(compilation_result.computation->proto(), + *module_config)); + DumpHloModuleIfEnabled(*hlo_module, "before_optimizations"); + hlo_modules->push_back(std::move(hlo_module)); + + return Status::OK(); +} + +XlaCompilationResultProto SerializeCompilationResult( + const XlaCompiler::CompilationResult& compilation_result) { + XlaCompilationResultProto compilation_result_proto; + for (int input_mapping : compilation_result.input_mapping) { + compilation_result_proto.add_input_mappings(input_mapping); + } + + for (const Shape& input_shape : compilation_result.xla_input_shapes) { + *(compilation_result_proto.add_xla_input_shapes()) = input_shape.ToProto(); + } + *(compilation_result_proto.mutable_xla_output_shape()) = + compilation_result.xla_output_shape.ToProto(); + + for (const XlaCompiler::OutputDescription& output_description : + compilation_result.outputs) { + auto* new_output = compilation_result_proto.add_outputs(); + new_output->set_type(output_description.type); + output_description.shape.AsProto(new_output->mutable_shape()); + new_output->set_is_constant(output_description.is_constant); + output_description.constant_value.AsProtoField( + new_output->mutable_constant_value()); + new_output->set_input_index(output_description.input_index); + new_output->set_is_tensor_list(output_description.is_tensor_list); + } + + *compilation_result_proto.mutable_host_compute_metadata() = + compilation_result.host_compute_metadata; + + for (const XlaCompiler::ResourceUpdate& resource_update : + compilation_result.resource_updates) { + auto* new_resource_update = compilation_result_proto.add_resource_updates(); + new_resource_update->set_input_index(resource_update.input_index); + new_resource_update->set_type(resource_update.type); + resource_update.shape.AsProto(new_resource_update->mutable_shape()); + new_resource_update->set_modified(resource_update.modified); + for (const std::string& gradient_access : + resource_update.tensor_array_gradients_accessed) { + new_resource_update->mutable_tensor_array_gradients_accessed()->insert( + {gradient_access, true}); + } + } + + if (compilation_result.computation != nullptr) { + *compilation_result_proto.mutable_computation() = + compilation_result.computation->proto(); + } + + return compilation_result_proto; +} + +StatusOr CreateTpuAotCompilationRequest( + const xla::HloModuleGroup& module_group, + const XlaCompiler::CompilationResult& compilation_result, + const TPUCompileMetadataProto& metadata, + const std::vector>& per_core_arg_shapes, + const std::vector>& per_core_output_shapes, + const std::vector>>& + per_core_variable_indices, + const absl::optional& device_assignment) { + VLOG(1) << "CreateTpuAotCompilationRequest."; + TpuAotCompilationRequestProto aot_request; + *(aot_request.mutable_hlo_module_group()) = module_group.ToProto(); + *(aot_request.mutable_metadata()) = metadata; + if (device_assignment.has_value()) { + xla::DeviceAssignmentProto device_assignment_proto; + Status status = device_assignment->Serialize(&device_assignment_proto); + if (!status.ok()) { + return status; + } + *(aot_request.mutable_device_assignment()) = device_assignment_proto; + } + + for (const auto& arg_shapes : per_core_arg_shapes) { + auto* new_shape_list = aot_request.add_per_core_arg_shapes(); + for (const auto& arg_shape : arg_shapes) { + *new_shape_list->add_shapes() = arg_shape.ToProto(); + } + } + + for (const auto& output_shapes : per_core_output_shapes) { + auto* new_shape_list = aot_request.add_per_core_output_shapes(); + for (const auto& output_shape : output_shapes) { + *new_shape_list->add_shapes() = output_shape.ToProto(); + } + } + + for (const auto& variable_indices : per_core_variable_indices) { + auto* new_list = aot_request.add_per_core_variable_indices(); + for (const auto& variable_index : variable_indices) { + auto* core_index = new_list->add_variable_indices(); + core_index->set_index(variable_index.first); + core_index->set_updated(variable_index.second); + } + } + + XlaCompilationResultProto compilation_result_proto = + SerializeCompilationResult(compilation_result); + *aot_request.mutable_compilation_result() = compilation_result_proto; + + VLOG(1) << "TpuAotCompilationRequest:\n" << aot_request.DebugString(); + return aot_request; +} +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compile_op_support.h b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h new file mode 100644 index 00000000000..0f21e458828 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compile_op_support.h @@ -0,0 +1,122 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_SUPPORT_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_SUPPORT_H_ + +#include +#include + +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" +#include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" +#include "absl/types/optional.h" +#include "absl/types/span.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/compile_only_client.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/hlo_module_group.h" +#include "tensorflow/compiler/xla/service/hlo_sharding.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" + +namespace tensorflow { +namespace tpu { + +namespace se = ::stream_executor; + +// Describes the position of an argument or return value after the computation +// has been partitioned into cores. +struct ShardingAndIndex { + // Sharding across cores. + ::xla::OpSharding sharding; + // Argument/return value number. If sharding is single-core, `indices` has a + // single element; otherwise, it has num_cores elements. + std::vector indices; +}; + +// TODO(b/158279168): Dedup with internal version. +// Return the per-device shape for a `shape` with a given `sharding`. +xla::Shape GetPerDeviceShape(const xla::Shape& shape, + const xla::HloSharding& sharding, + int64 device); + +stream_executor::port::StatusOr> +CreateModuleConfig( + const xla::ProgramShape& program_shape, + absl::Span argument_shapes, + absl::optional result_layout, + absl::optional device_assignment, + int replica_count, int num_partitions, + const xla::DebugOptions* debug_options, const int* seed, + const int* launch_id, const bool* alias_passthrough_params, + const xla::FusionConfigCollection* fusion_config_collection, + const std::vector>* fusion_config); + +stream_executor::port::StatusOr> +CreateModuleConfig( + const xla::ProgramShape& program_shape, + absl::Span argument_shapes, + absl::optional result_layout, + absl::optional device_assignment, + int replica_count, + int num_partitions, const xla::DebugOptions* debug_options); + +xla::ShapeTree GetSubtree( + const xla::ShapeTree& tuple_shape_tree, + int element_index); + +xla::Shape GetPerDeviceShape(const xla::Shape& shape, + const xla::HloSharding& sharding, + int64 device); + +Status AddVariableUpdatesToCores( + const TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + const std::vector& arg_core_mapping, + std::vector* may_modify_variables, + std::vector>* per_core_output_shapes, + std::vector>>* per_core_variable_indices); + +se::port::Status ComputeOutputShapesForEachCore( + const tpu::TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + std::vector>* per_core_output_shapes); + +se::port::Status CreateHloModules( + const TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + const absl::optional& device_assignment, + std::vector>* hlo_modules); + +se::port::StatusOr +CreateTpuAotCompilationRequest( + const xla::HloModuleGroup& module_group, + const XlaCompiler::CompilationResult& compilation_result, + const TPUCompileMetadataProto& metadata, + const std::vector>& per_core_arg_shapes, + const std::vector>& per_core_output_shapes, + const std::vector>>& + per_core_variable_indices, + const absl::optional& device_assignment); +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_OP_SUPPORT_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc new file mode 100644 index 00000000000..7fa345d735c --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.cc @@ -0,0 +1,298 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_configuration_ops.h" + +#include + +#include "tensorflow/c/tf_status.h" +#include "tensorflow/c/tf_status_helper.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" +#include "tensorflow/core/tpu/tpu_config_c_api.h" +#include "tensorflow/core/tpu/tpu_configuration.h" +#include "tensorflow/core/tpu/tpu_defs.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" + +namespace tensorflow { +namespace { + +Status GetTpuMeshStateInterface(const ResourceMgr* rmgr, + tpu::TpuMeshStateInterface** state) { + if (!rmgr->Lookup(rmgr->default_container(), + tpu::kTpuMeshCommonStateResourceName, state) + .ok()) { + return errors::FailedPrecondition( + "The TPU system has not been initialized."); + } + return Status::OK(); +} + +// Attempt to delete resource_name from resource_manager's default_container. +// Returns OK if the deletion succeeded, or if the resource was not found. Else +// return the deletion error. +template +Status DeleteIfExists(ResourceMgr* resource_manager, + const char* resource_name) { + VLOG(1) << "Removing resource " << resource_name << " if it exists"; + Status status = resource_manager->Delete( + resource_manager->default_container(), resource_name); + if (status.ok()) { + VLOG(1) << "Removed existing resource " << resource_name; + return Status::OK(); + } + if (status.code() == error::NOT_FOUND) { + VLOG(1) << "No resource " << resource_name << " to remove"; + return Status::OK(); + } + VLOG(1) << "Error removing resource " << resource_name << " : " << status; + return status; +} + +} // namespace + +void ConfigureDistributedTpuOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "ConfigureDistributedTpuOp"; + XLA_SCOPED_LOGGING_TIMER("ConfigureDistributedTpuOp"); + + std::vector num_devices_per_host; + int chips_per_host = -1; + for (int i = 0; i < ctx->num_inputs(); ++i) { + const Tensor& input_tensor = ctx->input(i); + OP_REQUIRES( + ctx, TensorShapeUtils::IsScalar(input_tensor.shape()), + errors::InvalidArgument("Input ", i, " should be a scalar but has ", + input_tensor.dims(), " dimensions")); + if (chips_per_host == -1) { + chips_per_host = input_tensor.scalar()(); + } else { + OP_REQUIRES( + ctx, chips_per_host == input_tensor.scalar()(), + errors::Internal("Host ", i, " has ", input_tensor.scalar()(), + " TPU chips but host 0 has ", chips_per_host)); + } + num_devices_per_host.push_back(input_tensor.scalar()()); + } + + TF_Status* status = TF_NewStatus(); + size_t host_config_output_size; + char* host_config_output; + + auto* rmgr = GetTPUConfigResourceMgr(); + OP_REQUIRES_OK(ctx, DeleteIfExists( + rmgr, tpu::kTpuMeshCommonStateResourceName)); + + ConfigureDistributedTpuOp_DoWork( + num_devices_per_host.size(), num_devices_per_host.data(), + &host_config_output_size, &host_config_output, status); + + OP_REQUIRES_OK(ctx, rmgr->Create(rmgr->default_container(), + tpu::kTpuMeshCommonStateResourceName, + tpu::TpuMeshStateInterface::Create())); + + Tensor* ctx_output; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &ctx_output)); + ctx_output->scalar()() = + std::string(host_config_output, host_config_output_size); + + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + TpuConfigurationApi_FreeCharArray(host_config_output); + + VLOG(1) << "ConfigureDistributedTpuOp done"; +} + +void WaitForDistributedTpuOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "WaitForDistributedTpuOp"; + XLA_SCOPED_LOGGING_TIMER("WaitForDistributedTpuOp"); + + size_t num_devices_per_host = -1; + size_t num_hosts = ctx->num_inputs(); + + for (int i = 0; i < ctx->num_inputs(); ++i) { + const Tensor& host_ordinal_to_global_device_id_tensor = ctx->input(i); + OP_REQUIRES( + ctx, host_ordinal_to_global_device_id_tensor.dims() == 1, + errors::InvalidArgument("Input ", i, " should be a vector but has ", + host_ordinal_to_global_device_id_tensor.dims(), + " dimensions")); + } + + std::vector> mapping; + std::vector mapping_arg; + + mapping.resize(ctx->num_inputs()); + + for (int i = 0; i < ctx->num_inputs(); ++i) { + const Tensor& host_ordinal_to_global_device_id_tensor = ctx->input(i); + const auto host_ordinal_to_global_device_id = + host_ordinal_to_global_device_id_tensor.flat(); + if (num_devices_per_host == -1) { + num_devices_per_host = + host_ordinal_to_global_device_id_tensor.dim_size(0); + } else { + OP_REQUIRES(ctx, + num_devices_per_host == + host_ordinal_to_global_device_id_tensor.dim_size(0), + errors::Internal( + "Host ", i, " has ", + host_ordinal_to_global_device_id_tensor.dim_size(0), + " TPU devices but host 0 has ", num_devices_per_host)); + } + for (int j = 0; j < host_ordinal_to_global_device_id_tensor.dim_size(0); + ++j) { + int32_t global_device_id = host_ordinal_to_global_device_id(j); + mapping[i].push_back(global_device_id); + } + mapping_arg.push_back(mapping[i].data()); + } + + TF_Status* status = TF_NewStatus(); + size_t tpu_topology_output_size; + char* tpu_topology_output; + + tpu::TpuMeshStateInterface* mesh_state; + auto* rmgr = GetTPUConfigResourceMgr(); + OP_REQUIRES_OK(ctx, GetTpuMeshStateInterface(rmgr, &mesh_state)); + core::ScopedUnref mesh_state_unref(mesh_state); + + WaitForDistributedTpuOp_DoWork( + num_hosts, num_devices_per_host, + const_cast(mapping_arg.data()), mesh_state, + &tpu_topology_output_size, &tpu_topology_output, status); + + Tensor* ctx_output; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &ctx_output)); + ctx_output->scalar()() = + std::string(tpu_topology_output, tpu_topology_output_size); + + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + TpuConfigurationApi_FreeCharArray(tpu_topology_output); + + VLOG(1) << "WaitForDistributedTpuOp done"; +} + +void ShutdownDistributedTpuOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "ShutdownDistributedTpuOp"; + XLA_SCOPED_LOGGING_TIMER("ShutdownDistributedTpuOp"); + + TF_Status* status = TF_NewStatus(); + OP_REQUIRES_OK(ctx, DeleteIfExists( + GetTPUConfigResourceMgr(), + tpu::kTpuMeshCommonStateResourceName)); + ShutdownDistributedTpuOp_DoWork(status); + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + + VLOG(1) << "ShutdownDistributedTpuOp done"; +} + +void InitializeHostForDistributedTpuOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "InitializeHostForDistributedTpuOp"; + XLA_SCOPED_LOGGING_TIMER("InitializeHostForDistributedTpuOp"); + + auto tpu_host_config = ctx->input(0).scalar()(); + + size_t device_id_output_size; + int32_t* device_id_output; + TF_Status* status = TF_NewStatus(); + + InitializeHostForDistributedTpuOp_DoWork( + tpu_host_config.size(), tpu_host_config.data(), + enable_whole_mesh_compilations_, &device_id_output_size, + &device_id_output, status); + + Tensor* ctx_output; + OP_REQUIRES_OK( + ctx, ctx->allocate_output( + 0, TensorShape({static_cast(device_id_output_size)}), + &ctx_output)); + + for (size_t i = 0; i < device_id_output_size; ++i) { + ctx_output->flat()(i) = device_id_output[i]; + } + + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + TpuConfigurationApi_FreeInt32Array(device_id_output); + + VLOG(1) << "InitializeHostForDistributedTpuOp done"; +} + +void SetGlobalTPUArrayOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "SetGlobalTPUArrayOp"; + XLA_SCOPED_LOGGING_TIMER("SetGlobalTPUArrayOp"); + + auto tpu_topology = ctx->input(0).scalar()(); + TF_Status* status = TF_NewStatus(); + + SetGlobalTPUArrayOp_DoWork(tpu_topology.size(), tpu_topology.data(), status); + + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + + VLOG(1) << "SetGlobalTPUArrayOp done"; +} + +void DisconnectDistributedTpuChipsOp::Compute(OpKernelContext* ctx) { + VLOG(1) << "DisconnectDistributedTpuChipsOp"; + XLA_SCOPED_LOGGING_TIMER("DisconnectDistributedTpuChipsOp"); + + TF_Status* status = TF_NewStatus(); + int32_t number_of_chips_output = 0; + + DisconnectDistributedTpuChipsOp_DoWork(&number_of_chips_output, status); + + Tensor* ctx_output; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &ctx_output)); + ctx_output->scalar()() = number_of_chips_output; + + OP_REQUIRES_OK(ctx, StatusFromTF_Status(status)); + TF_DeleteStatus(status); + + VLOG(1) << "DisconnectDistributedTpuChipsOp done"; +} + +// These ops execute on the TPU_SYSTEM device only. +REGISTER_KERNEL_BUILDER(Name("_ConfigureDistributedTPU") + .Device(DEVICE_TPU_SYSTEM) + .HostMemory("output"), + ConfigureDistributedTpuOp); +REGISTER_KERNEL_BUILDER(Name("_WaitForDistributedTPU") + .Device(DEVICE_TPU_SYSTEM) + .HostMemory("inputs") + .HostMemory("topology"), + WaitForDistributedTpuOp); +REGISTER_KERNEL_BUILDER( + Name("_ShutdownDistributedTPU").Device(DEVICE_TPU_SYSTEM), + ShutdownDistributedTpuOp); +REGISTER_KERNEL_BUILDER(Name("_InitializeHostForDistributedTPU") + .Device(DEVICE_TPU_SYSTEM) + .HostMemory("input") + .HostMemory("tpu_ids"), + InitializeHostForDistributedTpuOp); +REGISTER_KERNEL_BUILDER( + Name("_SetGlobalTPUArray").Device(DEVICE_TPU_SYSTEM).HostMemory("topology"), + SetGlobalTPUArrayOp); +REGISTER_KERNEL_BUILDER(Name("_DisconnectHostFromDistributedTPUSystem") + .Device(DEVICE_TPU_SYSTEM) + .HostMemory("number_of_tpu_chips"), + DisconnectDistributedTpuChipsOp); + +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_configuration_ops.h b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h new file mode 100644 index 00000000000..f75a47e5aaf --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_configuration_ops.h @@ -0,0 +1,156 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ + +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { + +// The ConfigureDistributedTpu op is used to start an TPUDriver from +// TensorFlow. It should be run on a TPU_SYSTEM device and returns the +// connection host:port for the CompilationCacheServer. The +// CompilationCacheServer will remain live until the device's Resource Manager +// is cleared or a ShutdownDistributedTpuOp is run on the same device. +class ConfigureDistributedTpuOp : public OpKernel { + public: + explicit ConfigureDistributedTpuOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + OP_REQUIRES( + ctx, ctx->num_inputs() > 0, + errors::Internal("_ConfigureDistributedTPU needs at least one input")); + } + void Compute(OpKernelContext* ctx) override; + ~ConfigureDistributedTpuOp() override {} + + private: + // ConfigureDistributedTpuOp is neither copyable nor movable. + ConfigureDistributedTpuOp(const ConfigureDistributedTpuOp&) = delete; + ConfigureDistributedTpuOp& operator=(const ConfigureDistributedTpuOp&) = + delete; +}; + +// The WaitForDistributedTpuOp op is used to block execution until +// the distributed Tpu system has started up. It must be run on +// the same TPU_SYSTEM device that ConfigureDistributedTpuOp was run +// on, after all of the InitializeHostForDistributedTpuOp Ops have +// completed. +class WaitForDistributedTpuOp : public OpKernel { + public: + explicit WaitForDistributedTpuOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, + ctx->GetAttr("startup_timeout_sec", &startup_timeout_sec_)); + OP_REQUIRES(ctx, startup_timeout_sec_ > 0, + errors::InvalidArgument("startup_timeout_sec ", + startup_timeout_sec_, " must be >0")); + } + void Compute(OpKernelContext* ctx) override; + ~WaitForDistributedTpuOp() override {} + + private: + // The time to wait for all hosts to start up. + int startup_timeout_sec_; + + // WaitForDistributedTpuOp is neither copyable nor movable. + WaitForDistributedTpuOp(const WaitForDistributedTpuOp&) = delete; + WaitForDistributedTpuOp& operator=(const WaitForDistributedTpuOp&) = delete; +}; + +// The ShutdownDistributedTpu op is used to stop a running TPUDriver from +// TensorFlow. It should be run on the TPU_SYSTEM device where +// ConfigureDistributedTpuOp was run. +class ShutdownDistributedTpuOp : public OpKernel { + public: + explicit ShutdownDistributedTpuOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override; + + ~ShutdownDistributedTpuOp() override {} + + private: + // ShutdownDistributedTpuOp is neither copyable nor movable. + ShutdownDistributedTpuOp(const ShutdownDistributedTpuOp&) = delete; + ShutdownDistributedTpuOp& operator=(const ShutdownDistributedTpuOp&) = delete; +}; + +// The InitializeHostForDistributedTpu op is used to initialize the +// TPUPlatform on a host in a distributed TPU system. It should be +// run on every host containing TPU devices before any other Ops that use +// TPU are run. +class InitializeHostForDistributedTpuOp : public OpKernel { + public: + explicit InitializeHostForDistributedTpuOp(OpKernelConstruction* ctx) + : OpKernel(ctx) { + ctx->GetAttr("enable_whole_mesh_compilations", + &enable_whole_mesh_compilations_) + .IgnoreError(); + } + + void Compute(OpKernelContext* ctx) override; + + ~InitializeHostForDistributedTpuOp() override {} + + private: + // InitializeHostForDistributedTpuOp is neither copyable nor movable. + InitializeHostForDistributedTpuOp(const InitializeHostForDistributedTpuOp&) = + delete; + InitializeHostForDistributedTpuOp& operator=( + const InitializeHostForDistributedTpuOp&) = delete; + + bool enable_whole_mesh_compilations_ = false; +}; + +// The SetGlobalTPUArray op is used to initialize the TPUPlatform on a +// host in a distributed TPU system. It should be run on every host +// containing TPU devices before any other Ops that use TPU are run. +class SetGlobalTPUArrayOp : public OpKernel { + public: + explicit SetGlobalTPUArrayOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override; + + ~SetGlobalTPUArrayOp() override {} + + private: + // SetGlobalTPUArrayOp is neither copyable nor movable. + SetGlobalTPUArrayOp(const SetGlobalTPUArrayOp&) = delete; + SetGlobalTPUArrayOp& operator=(const SetGlobalTPUArrayOp&) = delete; +}; + +// The DisconnectDistributedTpuChips op is used to disconnect all the chips on a +// host from a running TPUDriver instance. It should be run on every host +// containing TPU devices before the ShutdownDistributedTpuOp is run on +// the TPU_SYSTEM. +class DisconnectDistributedTpuChipsOp : public OpKernel { + public: + explicit DisconnectDistributedTpuChipsOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override; + + ~DisconnectDistributedTpuChipsOp() override {} + + private: + // DisconnectDistributedTpuChipsOp is neither copyable nor movable. + DisconnectDistributedTpuChipsOp(const DisconnectDistributedTpuChipsOp&) = + delete; + DisconnectDistributedTpuChipsOp& operator=( + const DisconnectDistributedTpuChipsOp&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_CONFIGURATION_OPS_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_executable_info.proto b/tensorflow/core/tpu/kernels/tpu_executable_info.proto new file mode 100644 index 00000000000..359dad03a72 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_executable_info.proto @@ -0,0 +1,94 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +syntax = "proto3"; + +package tensorflow; + +import "tensorflow/compiler/xla/service/hlo.proto"; +import "tensorflow/compiler/xla/xla_data.proto"; +import "tensorflow/core/framework/tensor_shape.proto"; + +// A serialization of TPUExecutable. Only includes fields necessary to load +// and execute a program on a worker node. +message TPUExecutableInfoProto { + reserved 1; + + // The shapes of the inputs and outputs. + repeated xla.ShapeProto input_shapes = 2; + reserved 7; // was input_shape + xla.ShapeProto output_shape = 3; + + message UpdateIndexPair { + int32 index = 1; + bool updated = 2; + } + + message ShapeIndex { + repeated int32 index = 1; + } + + // Dynamic output indices indicate which outputs have dynamic dimensions. + repeated ShapeIndex dynamic_output_indices = 11; + + // For each resource variable output, what was the index of the corresponding + // input and was it updated? The indices are sorted by input order. + repeated UpdateIndexPair variable_indices = 10; + + // The shapes of the outputs when represented as Tensors. These may not + // match the output_shape values because we may flatten tensors to avoid + // excess padding. + repeated TensorShapeProto output_tensor_shapes = 8; + + reserved 4; + + // Optional session module for passing XLA computations between TPUCompileOp + // and TPUExecuteOp. This is needed to support the + // --xla_dump_hlo_snapshots flag. + xla.HloSnapshot session_module = 5; + + // The physical device ids assigned to the replicated cores. + xla.DeviceAssignmentProto device_assignment = 6; +} + +// Metadata for a data transfer between device and host. +message TPUHostTransferProto { + enum TransferDirection { + NONE = 0; + DEVICE_TO_HOST = 1; + HOST_TO_DEVICE = 2; + } + // Channel identifier assigned by compiler and used in host commands. + int64 channel = 1; + // Direction of the transfer operation. + TransferDirection direction = 2; + // Channel identifier prodided by XLA client. + string key = 3; + // Depth of nested loops for this transfer operation. + int64 nested_while_level = 4; + // Shape of the data to be transferred (including layout). + xla.ShapeProto shape = 5; + // Address of the device buffer in HBM (byte offset). + int64 buffer_offset = 6; + // Original data type for this host transfer before X64 rewrite. + xla.PrimitiveType original_type = 7; + // If this host transfer is a splitted X64 transfer, sepcifies whether this + // transfer is for lower bits. + bool is_lower_bits = 8; +} + +message TPUHostTransferInfoProto { + repeated TPUHostTransferProto host_transfers = 1; +} diff --git a/tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h b/tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h new file mode 100644 index 00000000000..cb6a82efabc --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_MESH_STATE_C_API_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_MESH_STATE_C_API_H_ + +typedef struct XLA_TpuMeshState XLA_TpuMeshState; + +// Creates a new TPU mesh state object. +XLA_TpuMeshState* TpuMeshState_Create(); + +// Deletes the given TPU `mesh_state` object. Once deleted the object is +// unusable. +void TpuMeshState_Free(XLA_TpuMeshState* mesh_state); + +// Returns a pointer to an opaque mesh data structure used internally. +void* TpuMeshState_MeshCommonState(XLA_TpuMeshState* mesh_state); + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_MESH_STATE_C_API_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h b/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h new file mode 100644 index 00000000000..34202a78718 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h @@ -0,0 +1,78 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_MESH_STATE_INTERFACE_H_ +#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_MESH_STATE_INTERFACE_H_ + +#include + +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_mesh_state_c_api.h" + +namespace tensorflow { + +class TpuMeshCommonState; + +namespace tpu { + +const char kTpuMeshCommonStateResourceName[] = "tpu_mesh_common_state"; + +class TpuMeshStateInterface : public tensorflow::ResourceBase { + public: + explicit TpuMeshStateInterface(XLA_TpuMeshState* handle) + : mesh_state_(handle) { + } + + ~TpuMeshStateInterface() override { + if (mesh_state_ != nullptr) { + TpuMeshState_Free(mesh_state_); + } + } + + static TpuMeshStateInterface* Create() { + return new TpuMeshStateInterface(TpuMeshState_Create()); + } + + const XLA_TpuMeshState* data() const { return mesh_state_; } + + tensorflow::TpuMeshCommonState* mesh_common_state() const { + return static_cast( + TpuMeshState_MeshCommonState(mesh_state_)); + } + + // Returns whether we should include the device assignment as a static field + // to the TPU program. This also determines whether we should include the + // device assignment as part of the compilation cache key. + bool NeedsStaticDeviceAssignment( + const TPUCompileMetadataProto& metadata, + TpuCoreTypeEnum tpu_core_type) const { + // Static device assignment enables XLA to perform certain optimization when + // all cores are used in the replicated computation. + return metadata.num_cores_per_replica() * metadata.num_replicas() == + TpuTopology_AvailableCoreCount(mesh_state_, + tpu_core_type); + } + + string DebugString() const override { return "TpuMeshStateInterface"; } + + private: + XLA_TpuMeshState* mesh_state_; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_MESH_STATE_INTERFACE_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_program.cc b/tensorflow/core/tpu/kernels/tpu_program.cc new file mode 100644 index 00000000000..7d89ad15ae9 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_program.cc @@ -0,0 +1,201 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_program.h" + +#include "tensorflow/compiler/xla/service/hlo_module_group.h" +#include "tensorflow/compiler/xla/xla.pb.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" + +namespace tensorflow { +namespace tpu { + +namespace { + +namespace se_tpu = ::stream_executor::tpu; + +using stream_executor::port::StatusOr; +using xla::Shape; + +StatusOr> CompileAheadOfTime( + std::unique_ptr module_group, + const XlaCompiler::CompilationResult& compilation_result, + const TPUCompileMetadataProto& metadata, + const std::vector>& per_core_arg_shapes, + const std::vector>& per_core_output_shapes, + const std::vector>>& + per_core_variable_indices, + const absl::optional& device_assignment) { + VLOG(1) << "Run CompileAheadOfTime."; + TF_ASSIGN_OR_RETURN(TpuAotCompilationRequestProto aot_request, + CreateTpuAotCompilationRequest( + *module_group, compilation_result, metadata, + per_core_arg_shapes, per_core_output_shapes, + per_core_variable_indices, device_assignment)); + se_tpu::SerializedProto serialized_aot_request = + se_tpu::SerializeProto(aot_request); + auto cleanup = gtl::MakeCleanup([serialized_aot_request] { + se_tpu::SerializedProto_Free(serialized_aot_request); + }); + + XLA_TpuProgram** xla_tpu_programs = nullptr; + size_t count = 0; + StatusHelper status; + VLOG(1) << "Run TpuCompile_CompileAheadOfTime."; + TpuCompile_CompileAheadOfTime(serialized_aot_request, &xla_tpu_programs, + &count, status.c_status); + VLOG(1) << "Run CompileAheadOfTime completed."; + if (!status.status().ok()) { + return status.status(); + } + std::vector tpu_programs(count, nullptr); + for (size_t i = 0; i < count; ++i) { + tpu_programs[i] = xla_tpu_programs[i]; + } + delete[] xla_tpu_programs; + return tpu_programs; + return Status::OK(); +} + +StatusOr> CompileAheadOfTime( + const TPUCompileMetadataProto& metadata, + const XlaCompiler::CompilationResult& compilation_result, + const std::vector>& per_core_arg_shapes, + const std::vector>& per_core_output_shapes, + const std::vector>>& + per_core_variable_indices, + const absl::optional& device_assignment) { + VLOG(1) << "Compile Tpu programs."; + std::vector> hlo_modules; + auto status = CreateHloModules(metadata, compilation_result, + device_assignment, &hlo_modules); + if (!status.ok()) { + return status; + } + + return CompileAheadOfTime( + absl::make_unique(hlo_modules[0]->name(), + absl::MakeSpan(hlo_modules)), + compilation_result, metadata, per_core_arg_shapes, per_core_output_shapes, + per_core_variable_indices, device_assignment); +} + +} // namespace + +int64_t TpuProgram::program_size() const { + int64_t total_size = 0; + for (XLA_TpuProgram* tpu_program : tpu_programs_) { + total_size += TpuProgram_GetProgramSize(tpu_program); + } + return total_size; +} + +bool TpuProgram::LogProgramMemorySummary() { + bool success = true; + for (const XLA_TpuProgram* tpu_program : tpu_programs_) { + success &= TpuProgram_LogProgramMemorySummary(tpu_program); + } + return success; +} + +void TpuProgram::UnloadAndDestroyPrograms() { + for (XLA_TpuProgram* tpu_program : tpu_programs_) { + StatusHelper status; + TpuProgram_UnloadAndDestroy(tpu_program, status.c_status); + auto s = status.status(); + if (!s.ok()) { + LOG(ERROR) << "TpuProgram::UnloadPrograms(): " << s.ToString(); + } + } + tpu_programs_.clear(); +} + +/*static*/ Status TpuProgram::Build( + const TPUCompileMetadataProto& metadata, + const tensorflow::XlaCompiler::CompilationResult& compilation_result, + const std::vector& arg_core_mapping, + const std::vector>& per_core_arg_shapes, + const absl::optional& xla_device_assignment, + TpuProgram* tpu_program) { + std::vector> per_core_output_shapes( + metadata.num_cores_per_replica()); + TF_RETURN_IF_ERROR(ComputeOutputShapesForEachCore( + metadata, compilation_result, &per_core_output_shapes)); + + std::vector>> per_core_variable_indices( + metadata.num_cores_per_replica()); + std::vector may_modify_variables; + TF_RETURN_IF_ERROR(AddVariableUpdatesToCores( + metadata, compilation_result, arg_core_mapping, &may_modify_variables, + &per_core_output_shapes, &per_core_variable_indices)); + TF_RET_CHECK(per_core_arg_shapes.size() == metadata.num_cores_per_replica()); + TF_RET_CHECK(per_core_output_shapes.size() == per_core_arg_shapes.size()); + TF_RET_CHECK(per_core_output_shapes.size() == + per_core_variable_indices.size()); + tpu_program->set_may_modify_variables(may_modify_variables); + + // With shardable input/output pairs, XLA could generate separate + // sharding/unsharding programs along with the main program. The + // sharding/unsharding programs will be in nested entries of the AOT + // compilation result. + auto status_or = CompileAheadOfTime( + metadata, compilation_result, per_core_arg_shapes, per_core_output_shapes, + per_core_variable_indices, xla_device_assignment); + + TF_ASSIGN_OR_RETURN(std::vector xla_tpu_programs, + std::move(status_or)); + // SPMD could return 1 result for all partitions. + TF_RET_CHECK(xla_tpu_programs.size() == 1 || + xla_tpu_programs.size() == metadata.num_cores_per_replica()); + tpu_program->set_tpu_programs(xla_tpu_programs); + + // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. + TpuSerializedProto serialized_executable_info; + TpuProgram_GetExecutableInfo(xla_tpu_programs[0], + &serialized_executable_info); + TPUExecutableInfoProto executable_info = + se_tpu::DeserializeProto( + serialized_executable_info); + tpu_program->set_executable_info(executable_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); + + TPUHostTransferInfoProto host_transfer_info; + TpuSerializedProto serialized_host_transfer_info; + TpuProgram_GetHostTransferInfo(xla_tpu_programs[0], + &serialized_host_transfer_info); + if (serialized_host_transfer_info.size > 0) { + host_transfer_info = se_tpu::DeserializeProto( + serialized_host_transfer_info); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); + } + tpu_program->set_host_transfer_info(host_transfer_info); + + TpuSerializedProto serialized_hlo_metadata; + TpuProgram_GetHloMetadata(xla_tpu_programs[0], &serialized_hlo_metadata); + xla::HloProto hlo_metadata = + se_tpu::DeserializeProto(serialized_hlo_metadata); + tpu_program->set_hlo_metadata(hlo_metadata); + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); + + return Status::OK(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program.h b/tensorflow/core/tpu/kernels/tpu_program.h new file mode 100644 index 00000000000..aee55bd2f48 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_program.h @@ -0,0 +1,161 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ +#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ + +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/compile_only_client.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" +#include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" +#include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { +namespace tpu { + +class TpuAotCompilationOptions : public xla::AotCompilationOptions { + public: + explicit TpuAotCompilationOptions(int64 replica_count) + : num_cores_(0), replica_count_(replica_count) {} + + // Returns the ID of the platform to which these options apply. + se::Platform::Id PlatformId() const override { + LOG(FATAL) << "Not implemented."; + return nullptr; + }; + + void set_num_cores(int64 tpu_cores) { num_cores_ = tpu_cores; } + int64 replica_count() const override { return replica_count_; } + int64 num_cores() const override { return num_cores_; } + + void set_allow_separate_sharding_programs(bool allow) { + allow_separate_sharding_programs_ = allow; + } + bool allow_separate_sharding_programs() const { + return allow_separate_sharding_programs_; + } + + const std::vector + shardable_value_update_pairs() const { + return shardable_value_update_pairs_; + } + void set_shardable_value_update_pairs( + std::vector pairs) { + shardable_value_update_pairs_ = std::move(pairs); + } + + private: + int64 num_cores_; + int64 replica_count_; + + // Whether to allow the compiler to create separte sharding and unsharding + // programs, and modify the original program's input/output sharded size. This + // is used for XLA-chosen sharding on parameters without an on-device loop: + // the caller can invoke sharding first, then (repeatedly) invoke the sharded + // main program, and finally invoke the unsharding program when it needs the + // full output. + bool allow_separate_sharding_programs_ = false; + + // The list of input/output pairs in the main program that could be sharded. + std::vector + shardable_value_update_pairs_; +}; + +// An executable capable of being fed to a TPU device. +class TpuProgram { + public: + using Status = ::stream_executor::port::Status; + + virtual ~TpuProgram() = default; + + static Status Build( + const TPUCompileMetadataProto& metadata, + const tensorflow::XlaCompiler::CompilationResult& compilation_result, + const std::vector& arg_core_mapping, + const std::vector>& per_core_arg_shapes, + const absl::optional& xla_device_assignment, + TpuProgram* tpu_program); + + size_t program_count() const { + return tpu_programs_.size(); + } + + int64_t program_size() const; + + bool LogProgramMemorySummary(); + + void UnloadAndDestroyPrograms(); + + const std::vector& may_modify_variables() const { + return may_modify_variables_; + } + void set_may_modify_variables(const std::vector& may_modify_variables) { + may_modify_variables_ = may_modify_variables; + } + + const tf2xla::HostComputeMetadata& host_compute_metadata() const { + return host_compute_metadata_; + } + void set_host_compute_metadata( + const tf2xla::HostComputeMetadata& host_compute_metadata) { + host_compute_metadata_ = host_compute_metadata; + } + + const std::vector& tpu_programs() const { + return tpu_programs_; + } + void set_tpu_programs(std::vector tpu_programs) { + tpu_programs_ = tpu_programs; + } + + const TPUExecutableInfoProto& executable_info() const { + return executable_info_; + } + void set_executable_info(const TPUExecutableInfoProto& executable_info) { + executable_info_ = executable_info; + } + + const TPUHostTransferInfoProto& host_transfer_info() const { + return host_transfer_info_; + } + void set_host_transfer_info( + const TPUHostTransferInfoProto& host_transfer_info) { + host_transfer_info_ = host_transfer_info; + } + + const xla::HloProto& hlo_metadata() const { return hlo_metadata_; } + void set_hlo_metadata(const xla::HloProto& hlo_metadata) { + hlo_metadata_ = hlo_metadata; + } + + private: + std::vector may_modify_variables_; + tf2xla::HostComputeMetadata host_compute_metadata_; + + std::vector tpu_programs_; // Not owned. + TPUExecutableInfoProto executable_info_; + TPUHostTransferInfoProto host_transfer_info_; + xla::HloProto hlo_metadata_; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_util.cc b/tensorflow/core/tpu/kernels/tpu_util.cc new file mode 100644 index 00000000000..5c286de7672 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_util.cc @@ -0,0 +1,100 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_util.h" + +#include "absl/strings/str_split.h" +#include "tensorflow/core/platform/random.h" + +namespace tensorflow { +namespace tpu { + +std::string SessionNameFromMetadata(const SessionMetadata* session_metadata) { + return session_metadata ? session_metadata->name() : ""; +} + +std::string ProtoKeyForComputation(const std::string& key, int core) { + return absl::StrCat(key, ":", core); +} + +xla::StatusOr ParseCompilationCacheKey( + const std::string& key) { + const std::vector splits = absl::StrSplit(key, '|'); + if (splits.size() == 1) { + // No guaranteed_const. + return TpuCompilationCacheKey(key); + } else if (splits.size() != 3) { + return errors::InvalidArgument("Invalid TPU compilation cache key:", key); + } + + TpuCompilationCacheKey parsed_key(splits.at(0)); + parsed_key.has_guaranteed_const = true; + parsed_key.session_handle = splits.at(1); + const string fingerprint = splits.at(2); + parsed_key.guaranteed_const_fingerprint = [fingerprint] { + return fingerprint; + }; + return parsed_key; +} + +xla::CompileOnlyClient::AotXlaComputationInstance +BuildAotXlaComputationInstance( + const XlaCompiler::CompilationResult& compilation_result) { + xla::CompileOnlyClient::AotXlaComputationInstance instance; + instance.computation = compilation_result.computation.get(); + for (const xla::Shape& shape : compilation_result.xla_input_shapes) { + instance.argument_layouts.push_back(&shape); + } + instance.result_layout = &compilation_result.xla_output_shape; + return instance; +} + +Status ShapeTensorToTensorShape(const Tensor& tensor, TensorShape* shape) { + if (tensor.dtype() != DT_INT64 || + !TensorShapeUtils::IsVector(tensor.shape())) { + return errors::InvalidArgument("Shape tensor must be an int64 vector."); + } + const int64 rank = tensor.NumElements(); + auto tensor_dims = tensor.flat(); + std::vector dims(rank); + for (int64 i = 0; i < rank; ++i) { + dims[i] = tensor_dims(i); + } + return TensorShapeUtils::MakeShape(dims, shape); +} + +Status DynamicShapesToTensorShapes(const OpInputList& dynamic_shapes, + std::vector* shapes) { + shapes->resize(dynamic_shapes.size()); + for (int i = 0; i < dynamic_shapes.size(); ++i) { + TF_RETURN_IF_ERROR( + ShapeTensorToTensorShape(dynamic_shapes[i], &(*shapes)[i])); + } + return Status::OK(); +} + +Status DynamicShapesToTensorShapes(const InputList& dynamic_shapes, + std::vector* shapes) { + shapes->resize(dynamic_shapes.end() - dynamic_shapes.begin()); + size_t i = 0; + for (auto& dynamic_shape : dynamic_shapes) { + TF_RETURN_IF_ERROR( + ShapeTensorToTensorShape(dynamic_shape.tensor(), &(*shapes)[i])); + ++i; + } + return Status::OK(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_util.h b/tensorflow/core/tpu/kernels/tpu_util.h new file mode 100644 index 00000000000..0ca94d0af59 --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_util.h @@ -0,0 +1,67 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_H_ + +#include +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/compile_only_client.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/protobuf/config.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" + +namespace tensorflow { +namespace tpu { + +// Utility to get session_name from `SessionMetadata`. `SessionMetadata` may +// be null. +std::string SessionNameFromMetadata(const SessionMetadata* session_metadata); + +// Generates cache proto key for a given computation on a TPU core. +std::string ProtoKeyForComputation(const std::string& key, int core); + +// Returns a TpuCompilationCacheKey parsed from given key or an error. +xla::StatusOr ParseCompilationCacheKey( + const std::string& key); + +xla::CompileOnlyClient::AotXlaComputationInstance +BuildAotXlaComputationInstance( + const XlaCompiler::CompilationResult& compilation_result); + +// Returns true if TPU compilation is enabled. +bool IsTpuCompilationEnabled(); + +// Converts an int64 host memory `tensor` to a `shape`. +Status ShapeTensorToTensorShape(const Tensor& tensor, TensorShape* shape); + +Status DynamicShapesToTensorShapes(const OpInputList& dynamic_shapes, + std::vector* shapes); +Status DynamicShapesToTensorShapes(const InputList& dynamic_shapes, + std::vector* shapes); + +// Given a tensor of `shape` and `type`, as what shape should it be stored on +// the TPU device? This function tranposes or flattens the excessively-padded +// tensors to rank 1, but leaves other tensor shapes alone. +xla::StatusOr TpuShapeRepresentation(const TensorShape& shape, + DataType type, + bool use_fast_memory); +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_H_ diff --git a/tensorflow/core/tpu/kernels/trace_util.h b/tensorflow/core/tpu/kernels/trace_util.h new file mode 100644 index 00000000000..4e0b7c96892 --- /dev/null +++ b/tensorflow/core/tpu/kernels/trace_util.h @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TRACE_UTIL_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TRACE_UTIL_H_ + +#ifdef PLATFORM_GOOGLE +#include "base/tracer.h" +#else +#undef TRACESTRING +#define TRACESTRING(x) +#undef TRACELITERAL +#define TRACELITERAL(x) +#endif + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TRACE_UTIL_H_ diff --git a/tensorflow/core/tpu/tpu_library_loader.cc b/tensorflow/core/tpu/tpu_library_loader.cc index 3bc835c9c7f..c89de142a9f 100644 --- a/tensorflow/core/tpu/tpu_library_loader.cc +++ b/tensorflow/core/tpu/tpu_library_loader.cc @@ -64,13 +64,20 @@ TfTpu_ConfigApiFn* ConfigApiFn() { } Status InitializeTpuLibrary(void* library_handle) { + bool shared_object_loaded = true; if (library_handle == nullptr) { library_handle = dlopen(nullptr, RTLD_LAZY); + shared_object_loaded = false; } TF_RETURN_IF_ERROR(SetTpuInitializeStructFns(library_handle)); TF_RETURN_IF_ERROR(SetTpuConfigStructFns(library_handle)); + if (shared_object_loaded) { + // Initialize TPU platform when the platform code is loaded from a library. + InitializeApiFn()->TfTpu_InitializeFn(); + } + return Status::OK(); } diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD new file mode 100644 index 00000000000..52ca40f8d3f --- /dev/null +++ b/tensorflow/stream_executor/tpu/BUILD @@ -0,0 +1,234 @@ +# Description: StreamExecutor Interface for TPUs + +package( + default_visibility = ["//tensorflow/core/tpu:__subpackages__"], + licenses = ["notice"], # Apache 2.0 +) + +cc_library( + name = "tpu_executor_c_api_hdrs", + hdrs = ["tpu_executor_c_api.h"], + deps = [ + "//tensorflow/c:tf_attrtype", + "//tensorflow/c:tf_datatype", + "//tensorflow/c:tf_status", + ], +) + +cc_library( + name = "tpu_node_context_c_api_hdrs", + hdrs = ["tpu_node_context_c_api.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + ], +) + +cc_library( + name = "status_helper", + hdrs = ["status_helper.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + "//tensorflow/core/platform:status", + ], +) + +cc_library( + name = "c_api_conversions", + hdrs = ["c_api_conversions.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/stream_executor:stream", + "@com_google_absl//absl/container:inlined_vector", + ], +) + +cc_library( + name = "proto_helper", + srcs = ["proto_helper.cc"], + hdrs = ["proto_helper.h"], + deps = ["//tensorflow/core:lib"], +) + +cc_library( + name = "tpu_stream", + hdrs = ["tpu_stream.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + "//tensorflow/stream_executor:stream", + "//tensorflow/stream_executor/lib", + ], +) + +cc_library( + name = "tpu_timer", + hdrs = ["tpu_timer.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + "//tensorflow/core/platform:types", + "//tensorflow/stream_executor:stream", + ], +) + +cc_library( + name = "tpu_executor", + srcs = ["tpu_executor.cc"], + hdrs = ["tpu_executor.h"], + deps = [ + ":c_api_conversions", + ":status_helper", + ":tpu_executor_c_api_hdrs", + ":tpu_executor_interface", + ":tpu_platform", + ":tpu_platform_interface", + ":tpu_stream", + ":tpu_timer", + "//tensorflow/c:tf_status", + "//tensorflow/core:lib", + "//tensorflow/stream_executor:stream", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/container:flat_hash_map", + ], +) + +cc_library( + name = "tpu_executor_hdrs", + hdrs = ["tpu_executor.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + ":tpu_executor_interface", + ":tpu_platform_hdrs", + ":tpu_platform_interface", + "//tensorflow/core/platform:types", + "//tensorflow/stream_executor:stream_header", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/container:flat_hash_map", + ], +) + +cc_library( + name = "tpu_platform_hdrs", + hdrs = ["tpu_platform.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + ":tpu_platform_interface", + "//tensorflow/core/platform:types", + "//tensorflow/stream_executor:stream_header", + "@com_google_absl//absl/container:flat_hash_map", + ], +) + +cc_library( + name = "tpu_node_context", + srcs = ["tpu_node_context.cc"], + hdrs = ["tpu_node_context.h"], + deps = [ + ":status_helper", + ":tpu_executor_c_api_hdrs", + ":tpu_node_context_c_api_hdrs", + ":tpu_platform_interface", + ":tpu_transfer_manager", + "//tensorflow/compiler/xla/service", + "//tensorflow/compiler/xla/service:backend", + "//tensorflow/compiler/xla/service:platform_util", + "//tensorflow/compiler/xla/service:stream_pool", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/core:framework", + "//tensorflow/stream_executor:device_memory_allocator", + "//tensorflow/stream_executor/lib", + "@com_google_absl//absl/memory", + ], +) + +cc_library( + name = "tpu_platform", + srcs = ["tpu_platform.cc"], + hdrs = ["tpu_platform.h"], + deps = [ + ":status_helper", + ":tpu_executor_c_api_hdrs", + ":tpu_executor_hdrs", + ":tpu_platform_interface", + "//tensorflow/c:tf_status", + "//tensorflow/core/platform:types", + "//tensorflow/stream_executor:stream", + "@com_google_absl//absl/container:flat_hash_map", + ], + alwayslink = True, +) + +cc_library( + name = "tpu_transfer_manager", + srcs = ["tpu_transfer_manager_registration.cc"], + deps = [ + ":tpu_platform", + ":tpu_transfer_manager_base", + "//tensorflow/compiler/xla/service:transfer_manager", + ], +) + +cc_library( + name = "tpu_transfer_manager_base", + srcs = ["tpu_transfer_manager.cc"], + hdrs = ["tpu_transfer_manager.h"], + deps = [ + ":c_api_conversions", + ":proto_helper", + ":status_helper", + ":tpu_executor_c_api_hdrs", + ":tpu_platform", + "//tensorflow/compiler/xla:literal", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:xla_data_proto_cc", + "//tensorflow/compiler/xla/service:shaped_buffer", + "//tensorflow/compiler/xla/service:transfer_manager", + "//tensorflow/stream_executor:stream", + ], +) + +cc_library( + name = "tpu_computation_placer", + srcs = ["tpu_computation_placer.cc"], + hdrs = ["tpu_computation_placer.h"], + deps = [ + ":tpu_executor_c_api_hdrs", + ":tpu_platform", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/service:computation_placer", + ], + alwayslink = True, +) + +cc_library( + name = "tpu_platform_interface", + srcs = ["tpu_platform_interface.cc"], + hdrs = ["tpu_platform_interface.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core/platform:types", + "//tensorflow/stream_executor:multi_platform_manager", + "//tensorflow/stream_executor:stream_executor_headers", + ], +) + +cc_library( + name = "tpu_stream_interface", + hdrs = ["tpu_stream_interface.h"], + visibility = ["//visibility:public"], + deps = ["//tensorflow/stream_executor:stream_executor_internal"], +) + +cc_library( + name = "tpu_executor_interface", + hdrs = ["tpu_executor_interface.h"], + visibility = ["//visibility:public"], + deps = [ + ":tpu_platform_interface", + "//tensorflow/core/platform:errors", + "//tensorflow/stream_executor:stream_executor_internal", + "//tensorflow/stream_executor:stream_header", + ], +) diff --git a/tensorflow/stream_executor/tpu/c_api_conversions.h b/tensorflow/stream_executor/tpu/c_api_conversions.h new file mode 100644 index 00000000000..1bb9ecee688 --- /dev/null +++ b/tensorflow/stream_executor/tpu/c_api_conversions.h @@ -0,0 +1,115 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ + +#include "absl/container/inlined_vector.h" +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +class TpuConversions { + public: + static stream_executor::DeviceMemoryBase + SE_DeviceMemoryBaseToDeviceMemoryBase(SE_DeviceMemoryBase se_base) { + stream_executor::DeviceMemoryBase base(se_base.opaque, se_base.size); + base.SetPayload(se_base.payload); + return base; + } + + static SE_DeviceMemoryBase DeviceMemoryBaseToSE_DeviceMemoryBase( + const stream_executor::DeviceMemoryBase& base) { + SE_DeviceMemoryBase se_base; + se_base.opaque = const_cast(base.opaque()); + se_base.payload = base.payload(); + se_base.size = base.size(); + return se_base; + } + + static xla::Shape CShapeToXlaShape(XLA_Shape* shape) { + xla::ShapeProto p; + p.ParseFromArray(shape->bytes, shape->size); + return xla::Shape(p); + } + + static void XlaShapeToCShape(const xla::Shape& xla_shape, + XLA_Shape* c_shape) { + xla::ShapeProto p = xla_shape.ToProto(); + std::string p_str = p.SerializeAsString(); + c_shape->bytes = new char[p_str.size()]; + c_shape->size = p_str.size(); + memcpy(c_shape->bytes, p_str.data(), p_str.size()); + } + + static void XLAShapedBufferToCShapedBuffer( + const xla::ShapedBuffer& buffer, XLA_ShapedBuffer* c_device_buffer) { + XlaShapeToCShape(buffer.on_host_shape(), &c_device_buffer->on_host_shape); + XlaShapeToCShape(buffer.on_device_shape(), + &c_device_buffer->on_device_shape); + c_device_buffer->device_ordinal = buffer.device_ordinal(); + absl::InlinedVector bases; + for (auto& pair : buffer.buffers()) { + bases.push_back(DeviceMemoryBaseToSE_DeviceMemoryBase(pair.second)); + } + c_device_buffer->count = bases.size(); + c_device_buffer->bases = new SE_DeviceMemoryBase[bases.size()]; + for (int i = 0; i < bases.size(); ++i) { + c_device_buffer->bases[i] = bases[i]; + } + } + + static void XLALiteralToCLiteral(const xla::LiteralSlice& literal, + XLA_Literal* c_literal) { + XlaShapeToCShape(literal.shape(), &c_literal->shape); + auto shapes = xla::ShapeUtil::GetLeafShapes(literal.shape()); + c_literal->buffers = new char*[shapes.size()]; + c_literal->sizes = new size_t[shapes.size()]; + c_literal->count = shapes.size(); + for (int i = 0; i < shapes.size(); ++i) { + c_literal->buffers[i] = reinterpret_cast( + const_cast(literal.untyped_data(shapes[i].index))); + c_literal->sizes[i] = literal.size_bytes(shapes[i].index); + } + } + + static xla::MutableBorrowingLiteral CLiteralToXLALiteral( + XLA_Literal* c_literal) { + xla::Shape shape = CShapeToXlaShape(&c_literal->shape); + LOG(INFO) << "Shape: " << shape.DebugString(); + return xla::MutableBorrowingLiteral( + absl::MakeSpan(c_literal->buffers, c_literal->count), shape); + } + + static void CShapeCleanup(XLA_Shape* c_shape) { delete[] c_shape->bytes; } + + static void CLiteralCleanup(XLA_Literal* c_literal) { + delete[] c_literal->buffers; + delete[] c_literal->sizes; + CShapeCleanup(&c_literal->shape); + } + + static void CShapedBufferCleanup(XLA_ShapedBuffer* c_buffer) { + CShapeCleanup(&c_buffer->on_device_shape); + CShapeCleanup(&c_buffer->on_host_shape); + delete[] c_buffer->bases; + } +}; + +#endif // THIRD_PARTY_TENSORFLOW_STREAM_EXECUTOR_TPU_C_API_CONVERSIONS_H_ diff --git a/tensorflow/stream_executor/tpu/proto_helper.cc b/tensorflow/stream_executor/tpu/proto_helper.cc new file mode 100644 index 00000000000..db663c6671b --- /dev/null +++ b/tensorflow/stream_executor/tpu/proto_helper.cc @@ -0,0 +1,27 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/proto_helper.h" + +extern "C" { + +void StreamExecutor_Tpu_FreeSerializedProto(const TpuSerializedProto* proto) { + CHECK_NE(proto, nullptr); + CHECK_NE(proto->bytes, nullptr); + CHECK_GT(proto->size, 0); + delete[] proto->bytes; +} + +} // extern "C" diff --git a/tensorflow/stream_executor/tpu/proto_helper.h b/tensorflow/stream_executor/tpu/proto_helper.h new file mode 100644 index 00000000000..3bd2b09f95e --- /dev/null +++ b/tensorflow/stream_executor/tpu/proto_helper.h @@ -0,0 +1,85 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_PROTO_HELPER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_PROTO_HELPER_H_ + +#include + +#include "tensorflow/core/platform/logging.h" + +extern "C" { + +typedef struct TpuSerializedProto { + const char* bytes; + size_t size; +} TpuSerializedProto; + +void StreamExecutor_Tpu_FreeSerializedProto(const TpuSerializedProto* proto); + +} // extern "C" + +namespace stream_executor { +namespace tpu { + +using SerializedProto = TpuSerializedProto; + +// Serializes a proto and put the result in the given SerializedProto* argument. +// +// Users should call SerializedProto_Free on `serialized_proto` afterwards. +template +inline void SerializeProto(const Proto& proto, + SerializedProto* serialized_proto) { + auto size = proto.ByteSizeLong(); + auto bytes = new char[size]; + CHECK(proto.SerializeToArray(bytes, size)); + serialized_proto->size = size; + serialized_proto->bytes = bytes; +} + +// Serializes a proto and return the result as a SerializedProto value. +// +// Users should call SerializedProto_Free on the return value afterwards. +template +inline SerializedProto SerializeProto(const Proto& proto) { + SerializedProto serialized_proto; + SerializeProto(proto, &serialized_proto); + return serialized_proto; +} + +// Deserializes a buffer and return the corresponding proto. If the buffer is +// empty, return an empty proto. +template +inline Proto DeserializeProto(const SerializedProto& serialized_proto) { + Proto proto; + if (serialized_proto.bytes != nullptr) { + CHECK_GT(serialized_proto.size, 0); + CHECK(proto.ParseFromArray(serialized_proto.bytes, serialized_proto.size)) + << "Invalid buffer, failed to deserialize buffer."; + } + return proto; +} + +// Releases the memory allocated for serialized protos. +inline void SerializedProto_Free(const SerializedProto& serialized_proto) { + CHECK_NE(serialized_proto.bytes, nullptr); + CHECK_GT(serialized_proto.size, 0); + delete[] serialized_proto.bytes; +} + +} // namespace tpu +} // namespace stream_executor + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_PROTO_HELPER_H_ diff --git a/tensorflow/stream_executor/tpu/status_helper.h b/tensorflow/stream_executor/tpu/status_helper.h new file mode 100644 index 00000000000..8fcf302edac --- /dev/null +++ b/tensorflow/stream_executor/tpu/status_helper.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ + +#include "tensorflow/core/platform/status.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +struct StatusHelper { + StatusHelper() : c_status(TpuStatus_New()) {} + ~StatusHelper() { TpuStatus_Free(c_status); } + bool ok() { return TpuStatus_Code(c_status) == 0; } + tensorflow::Status status() { + if (!ok()) { + return tensorflow::Status( + tensorflow::error::Code(TpuStatus_Code(c_status)), + TpuStatus_Message(c_status)); + } else { + return tensorflow::Status::OK(); + } + } + SE_Status* c_status; +}; + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_STATUS_HELPER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_computation_placer.cc b/tensorflow/stream_executor/tpu/tpu_computation_placer.cc new file mode 100644 index 00000000000..660b446d953 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_computation_placer.cc @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_computation_placer.h" + +#include "tensorflow/stream_executor/tpu/tpu_platform.h" + +template +using StatusOr = TpuComputationPlacer::StatusOr; + +TpuComputationPlacer::TpuComputationPlacer() { + placer_ = TpuComputationPlacer_New(); +} + +TpuComputationPlacer::~TpuComputationPlacer() { + TpuComputationPlacer_Free(placer_); +} + +StatusOr TpuComputationPlacer::DeviceId(int replica, int computation, + int replica_count, + int computation_count) { + LOG(FATAL) << "Unimplemented."; +} + +StatusOr TpuComputationPlacer::AssignDevices( + int replica_count, int computation_count) { + LOG(FATAL) << "Unimplemented."; +} + +static std::unique_ptr CreateTpuComputationPlacer() { + return std::make_unique(); +} + +static bool InitModule() { + xla::ComputationPlacer::RegisterComputationPlacer( + tensorflow::TpuPlatform::kId, CreateTpuComputationPlacer); + return true; +} +static bool module_initialized = InitModule(); diff --git a/tensorflow/stream_executor/tpu/tpu_computation_placer.h b/tensorflow/stream_executor/tpu/tpu_computation_placer.h new file mode 100644 index 00000000000..c8f4c9e3888 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_computation_placer.h @@ -0,0 +1,41 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_COMPUTATION_PLACER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_COMPUTATION_PLACER_H_ + +#include "tensorflow/compiler/xla/service/computation_placer.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +class TpuComputationPlacer : public xla::ComputationPlacer { + public: + template + using StatusOr = xla::StatusOr; + + TpuComputationPlacer(); + ~TpuComputationPlacer() override; + + StatusOr DeviceId(int replica, int computation, int replica_count, + int computation_count) override; + + StatusOr AssignDevices(int replica_count, + int computation_count) override; + + private: + XLA_ComputationPlacer* placer_; +}; + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_COMPUTATION_PLACER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor.cc b/tensorflow/stream_executor/tpu/tpu_executor.cc new file mode 100644 index 00000000000..92808936467 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executor.cc @@ -0,0 +1,355 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_executor.h" + +#include "tensorflow/c/tf_status.h" +#include "tensorflow/core/lib/gtl/cleanup.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/lib/status.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_stream.h" +#include "tensorflow/stream_executor/tpu/tpu_timer.h" + +using stream_executor::DeviceMemoryBase; + +namespace tensorflow { + +namespace { +using ::stream_executor::port::Status; +} // namespace + +TpuExecutor::~TpuExecutor() { TpuExecutor_Free(executor_); } + +Status TpuExecutor::Init(int device_ordinal, + ::stream_executor::DeviceOptions device_options) { + StatusHelper status; + SE_DeviceOptions* options = + TpuExecutor_NewDeviceOptions(device_options.flags()); + TpuExecutor_Init(executor_, device_ordinal, options, status.c_status); + TpuExecutor_FreeDeviceOptions(options); + return status.status(); +} + +int TpuExecutor::PlatformDeviceCount() { + return TpuExecutor_PlatformDeviceCount(executor_); +} + +void TpuExecutor::SyncAndForgetFailedStreams() { + TpuExecutor_SyncAndForgetFailedStreams(executor_); +} + +bool TpuExecutor::SynchronizeAllActivity() { + return TpuExecutor_SynchronizeAllActivity(executor_); +} + +Status TpuExecutor::BlockHostUntilDone(Stream* stream) { + StatusHelper status; + TpuExecutor_BlockHostUntilDone( + executor_, stream_map().at(stream->implementation()), status.c_status); + return status.status(); +} + +Status TpuExecutor::BlockUntilDoneOrFailed() { + StatusHelper status; + TpuExecutor_BlockUntilDoneOrFailed(executor_, status.c_status); + return status.status(); +} + +Status TpuExecutor::GetStatus(Stream* stream) { + StatusHelper status; + TpuExecutor_GetStatus(executor_, stream_map().at(stream->implementation()), + status.c_status); + return status.status(); +} + +bool TpuExecutor::AllocateStream(Stream* stream) { + return TpuExecutor_AllocateStream(executor_, + stream_map().at(stream->implementation())); +} + +void TpuExecutor::DeallocateStream(Stream* stream) { + TpuExecutor_DeallocateStream(executor_, + stream_map().at(stream->implementation())); + stream_map().erase(stream->implementation()); +} + +bool TpuExecutor::CreateStreamDependency(Stream* dependent, Stream* other) { + return TpuExecutor_CreateStreamDependency( + executor_, stream_map().at(dependent->implementation()), + stream_map().at(other->implementation())); +} + +Status TpuExecutor::AllocateEvent(Event* event) { return Status::OK(); } + +Status TpuExecutor::DeallocateEvent(Event* event) { return Status::OK(); } + +// AllocateTimer/DeallocateTimer have no specialization. +bool TpuExecutor::AllocateTimer(Timer* timer) { return true; } + +void TpuExecutor::DeallocateTimer(Timer* timer) {} + +bool TpuExecutor::StartTimer(Stream* stream, ::stream_executor::Timer* timer) { + return TpuExecutor_StartTimer(executor_, + stream_map().at(stream->implementation()), + timer_map_.at(timer->implementation())); +} + +bool TpuExecutor::StopTimer(Stream* stream, ::stream_executor::Timer* timer) { + return TpuExecutor_StopTimer(executor_, + stream_map().at(stream->implementation()), + timer_map_.at(timer->implementation())); +} + +stream_executor::Event::Status TpuExecutor::PollForEventStatus( + stream_executor::Event* event) { + return stream_executor::Event::Status(TpuExecutor_PollForEventStatus( + executor_, event_map_.at(event->implementation()))); +} + +Status TpuExecutor::RecordEvent(Stream* stream, + ::stream_executor::Event* event) { + StatusHelper status; + TpuExecutor_RecordEvent(executor_, stream_map().at(stream->implementation()), + event_map_.at(event->implementation()), + status.c_status); + return status.status(); +} + +Status TpuExecutor::WaitForEvent(Stream* stream, + ::stream_executor::Event* event) { + StatusHelper status; + TpuExecutor_WaitForEvent(executor_, stream_map().at(stream->implementation()), + event_map_.at(event->implementation()), + status.c_status); + return status.status(); +} + +// Implementations for Timer, Stream, Event +// We need to map these implementations to internal equivalents -- thus we +// allocate the internal Timer, Stream and Event operations here, and map +// the implementations to the internal values. The "wrapper" interfaces are +// responsible for deallocating the internal value when they are destroyed. + +// Called by Timer::Timer +std::unique_ptr<::stream_executor::internal::TimerInterface> +TpuExecutor::GetTimerImplementation() { + SE_Timer* tpu_timer = TpuTimer_New(executor_); + auto ptr = absl::make_unique(tpu_timer); + timer_map_[ptr.get()] = tpu_timer; + return ptr; +} + +// Called by Stream::Stream +std::unique_ptr<::stream_executor::internal::StreamInterface> +TpuExecutor::GetStreamImplementation() { + SE_Stream* tpu_stream = TpuStream_New(executor_); + auto ptr = absl::make_unique(tpu_stream); + stream_map()[ptr.get()] = tpu_stream; + return ptr; +} + +// Called by Event::Event +std::unique_ptr<::stream_executor::internal::EventInterface> +TpuExecutor::CreateEventImplementation() { + SE_Event* tpu_event = TpuEvent_New(executor_); + auto ptr = absl::make_unique(tpu_event); + event_map_[ptr.get()] = tpu_event; + return ptr; +} + +DeviceMemoryBase TpuExecutor::Allocate(uint64 size, int64 memory_space) { + SE_DeviceMemoryBase se_base = + TpuExecutor_Allocate(executor_, size, memory_space); + return TpuConversions::SE_DeviceMemoryBaseToDeviceMemoryBase(se_base); +} + +void TpuExecutor::Deallocate(const DeviceMemoryBase& memory) { + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(memory); + TpuExecutor_Deallocate(executor_, &se_base); +} + +void TpuExecutor::Deallocate(DeviceMemoryBase* memory) { + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(*memory); + TpuExecutor_Deallocate(executor_, &se_base); +} + +bool TpuExecutor::DeviceMemoryUsage(int64* free, int64* total) const { + int64_t _free; + int64_t _total; + if (TpuExecutor_DeviceMemoryUsage(executor_, &_free, &_total)) { + *free = _free; + *total = _total; + return true; + } + return false; +} + +absl::optional +TpuExecutor::GetAllocatorStats() { + SE_AllocatorStats c_stats; + if (TpuExecutor_GetAllocatorStats(executor_, &c_stats)) { + ::stream_executor::AllocatorStats stats; + stats.num_allocs = c_stats.num_allocs; + stats.bytes_in_use = c_stats.bytes_in_use; + stats.peak_bytes_in_use = c_stats.peak_bytes_in_use; + stats.largest_alloc_size = c_stats.largest_alloc_size; + if (c_stats.has_bytes_limit) { + stats.bytes_limit = c_stats.bytes_limit; + } + stats.bytes_reserved = c_stats.bytes_reserved; + stats.peak_bytes_reserved = c_stats.peak_bytes_reserved; + if (c_stats.has_bytes_reservable_limit) { + stats.bytes_reservable_limit = c_stats.bytes_reservable_limit; + } + stats.largest_free_block_bytes = c_stats.largest_free_block_bytes; + return stats; + } + return {}; +} + +Status TpuExecutor::WaitForInfeedReady(int32 infeed_queue_index) { + StatusHelper status; + TpuExecutor_WaitForInfeedReady(executor_, infeed_queue_index, + status.c_status); + return status.status(); +} + +Status TpuExecutor::WaitForOutfeedReady(int32 outfeed_queue_index) { + StatusHelper status; + TpuExecutor_WaitForOutfeedReady(executor_, outfeed_queue_index, + status.c_status); + return status.status(); +} + +void TpuExecutor::DequeueOutfeed(int32 outfeed_queue_index, + absl::Span bytes, StatusCallback done) { + StatusHelper status; + TpuExecutor_DequeueOutfeed(executor_, outfeed_queue_index, bytes.data(), + bytes.size(), status.c_status); + done(status.status()); +} + +Status TpuExecutor::EnqueueInfeed(int32 infeed_queue_index, + absl::Span bytes) { + StatusHelper status; + TpuExecutor_EnqueueInfeed(executor_, infeed_queue_index, bytes.data(), + bytes.size(), status.c_status); + return status.status(); +} + +bool TpuExecutor::Memcpy(Stream* stream, void* host_dst, + const ::stream_executor::DeviceMemoryBase& device_src, + uint64 size) { + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(device_src); + return TpuExecutor_MemcpyToHost(executor_, + stream_map().at(stream->implementation()), + host_dst, &se_base, size); +} + +bool TpuExecutor::Memcpy(Stream* stream, + ::stream_executor::DeviceMemoryBase* device_dst, + const void* host_src, uint64 size) { + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(*device_dst); + return TpuExecutor_MemcpyFromHost(executor_, + stream_map().at(stream->implementation()), + &se_base, host_src, size); +} + +Status TpuExecutor::SynchronousMemcpy( + ::stream_executor::DeviceMemoryBase* device_dst, const void* host_src, + uint64 size) { + StatusHelper status; + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(*device_dst); + TpuExecutor_SynchronousMemcpyFromHost(executor_, &se_base, host_src, size, + status.c_status); + return status.status(); +} + +Status TpuExecutor::SynchronousMemcpy( + void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src, + uint64 size) { + StatusHelper status; + SE_DeviceMemoryBase se_base = + TpuConversions::DeviceMemoryBaseToSE_DeviceMemoryBase(device_src); + TpuExecutor_SynchronousMemcpyToHost(executor_, host_dst, &se_base, size, + status.c_status); + return status.status(); +} + +Status TpuExecutor::SynchronousMemcpyDeviceToDevice( + ::stream_executor::DeviceMemoryBase* device_dst, + const ::stream_executor::DeviceMemoryBase& device_src, uint64 size) { + return ::stream_executor::port::UnimplementedError( + "This operation not supported on TPU"); +} + +bool TpuExecutor::MemcpyDeviceToDevice( + Stream* stream, ::stream_executor::DeviceMemoryBase* gpu_dst, + const ::stream_executor::DeviceMemoryBase& host_src, uint64 size) { + LOG(FATAL) << __func__ << " not supported on TpuExecutor"; +} + +struct HostCallbackContext { + std::function callback; +}; + +SE_Status* HostCallbackTrampoline(void* ctx) { + HostCallbackContext* host_ctx = reinterpret_cast(ctx); + Status status = host_ctx->callback(); + SE_Status* c_status = + TpuStatus_Create(status.code(), status.error_message().c_str()); + delete host_ctx; + return c_status; +} + +bool TpuExecutor::HostCallback(Stream* stream, + std::function callback) { + HostCallbackContext* ctx = new HostCallbackContext{callback}; + return TpuExecutor_HostCallback(executor_, + stream_map().at(stream->implementation()), + &HostCallbackTrampoline, ctx); +} + +TpuExecutor::StatusOr> +TpuExecutor::CreateDeviceDescription() const { + StatusHelper status; + SE_DeviceDescription* description = TpuDeviceDescription_New(); + auto cleanup = tensorflow::gtl::MakeCleanup( + [description]() { TpuDeviceDescription_Free(description); }); + TpuExecutor_CreateDeviceDescription(executor_, description, status.c_status); + if (status.status().ok()) { + stream_executor::internal::DeviceDescriptionBuilder builder; + CHECK_NE(description->device_vendor, nullptr); + builder.set_device_vendor(description->device_vendor); + builder.set_name(description->name); + builder.set_clock_rate_ghz(description->clock_rate_ghz); + builder.set_core_count(description->core_count); + builder.set_ecc_enabled(description->ecc_enabled); + builder.set_device_memory_size(description->device_memory_size); + builder.set_platform_version(description->platform_version); + return builder.Build(); + } + return status.status(); +} + +} // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_executor.h b/tensorflow/stream_executor/tpu/tpu_executor.h new file mode 100644 index 00000000000..5f366421c4c --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executor.h @@ -0,0 +1,241 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_H_ + +#include "absl/container/flat_hash_map.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/device_options.h" +#include "tensorflow/stream_executor/event.h" +#include "tensorflow/stream_executor/lib/statusor.h" +#include "tensorflow/stream_executor/stream.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/temporary_device_memory.h" +#include "tensorflow/stream_executor/timer.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_interface.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { + +class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface { + public: + using Status = ::stream_executor::port::Status; + template + using StatusOr = ::stream_executor::port::StatusOr; + using StatusCallback = std::function; + using Stream = ::stream_executor::Stream; + using Event = ::stream_executor::Event; + using Timer = ::stream_executor::Timer; + using DeviceMemoryBase = ::stream_executor::DeviceMemoryBase; + using StreamInterface = ::stream_executor::internal::StreamInterface; + using StreamExecutorInterface = + ::stream_executor::internal::StreamExecutorInterface; + + using EventMap = + absl::flat_hash_map; + using TimerMap = + absl::flat_hash_map; + + explicit TpuExecutor(::tensorflow::tpu::TpuPlatformInterface* platform, + SE_StreamExecutor* executor) + : platform_(platform), executor_(executor) {} + + ~TpuExecutor() override; + + Status Init(int device_ordinal, + ::stream_executor::DeviceOptions device_options) override; + + DeviceMemoryBase Allocate(uint64 size, int64 memory_space) override; + + StatusOr AllocateDeviceMemoryBase(uint64 size, + int64 memory_space); + + Status AllocateEvent(Event* event) override; + + bool AllocateStream(Stream* stream) override; + + bool AllocateTimer(Timer* timer) override; + + Status BlockHostUntilDone(::stream_executor::Stream* stream) override; + + Status BlockUntilDoneOrFailed(); + + StatusOr> + CreateDeviceDescription() const override; + + bool CreateStreamDependency(Stream* dependent, Stream* other) override; + + void DeallocateStream(Stream* stream) override; + + void Deallocate(const DeviceMemoryBase& memory); + + void Deallocate(DeviceMemoryBase* memory) override; + + Status DeallocateEvent(Event* event) override; + + void DeallocateTimer(Timer* timer) override; + + bool DeviceMemoryUsage(int64* free, int64* total) const override; + + void DequeueOutfeed(int32 outfeed_queue_index, absl::Span bytes, + StatusCallback done); + + Status EnqueueInfeed(int32 infeed_queue_index, + absl::Span bytes); + + absl::optional GetAllocatorStats() override; + + Status GetStatus(Stream* stream) override; + + std::unique_ptr<::stream_executor::internal::StreamInterface> + GetStreamImplementation() override; + + std::unique_ptr<::stream_executor::internal::TimerInterface> + GetTimerImplementation() override; + + std::unique_ptr<::stream_executor::internal::EventInterface> + CreateEventImplementation() override; + + bool HostCallback(Stream* stream, std::function callback) override; + + bool Memcpy(Stream* stream, void* host_dst, + const ::stream_executor::DeviceMemoryBase& device_src, + uint64 size) override; + + bool Memcpy(Stream* stream, ::stream_executor::DeviceMemoryBase* device_dst, + const void* host_src, uint64 size) override; + + bool MemcpyDeviceToDevice(Stream* stream, + ::stream_executor::DeviceMemoryBase* gpu_dst, + const ::stream_executor::DeviceMemoryBase& host_src, + uint64 size) override; + + void SyncAndForgetFailedStreams(); + bool SynchronizeAllActivity() override; + + Status SynchronousMemcpy(::stream_executor::DeviceMemoryBase* device_dst, + const void* host_src, uint64 size) override; + Status SynchronousMemcpy( + void* host_dst, const ::stream_executor::DeviceMemoryBase& device_src, + uint64 size) override; + Status SynchronousMemcpyDeviceToDevice( + ::stream_executor::DeviceMemoryBase* device_dst, + const ::stream_executor::DeviceMemoryBase& device_src, + uint64 size) override; + + int PlatformDeviceCount() override; + + Event::Status PollForEventStatus(Event* event) override; + Status RecordEvent(Stream* stream, ::stream_executor::Event* event) override; + Status WaitForEvent(Stream* stream, ::stream_executor::Event* event) override; + + bool StartTimer(Stream* stream, ::stream_executor::Timer* timer) override; + bool StopTimer(Stream* stream, ::stream_executor::Timer* timer) override; + + Status WaitForInfeedReady(int32 infeed_queue_index); + + Status WaitForOutfeedReady(int32 outfeed_queue_index); + + const ::tensorflow::tpu::TpuPlatformInterface& platform() const override { + return *platform_; + } + + ::tensorflow::tpu::TpuPlatformInterface& platform() override { + return *platform_; + } + + // TODO(henrytan): convert this to override once the base interface is changed + // to TpuExecutorInterface. + StatusOr> + CreateTemporaryDeviceMemory(int64 memory_space, int64 byte_offset, + int64 size) override { + LOG(FATAL) << "Unimplemented."; + } + + // -- Unimplemented (stubbed out) methods. + std::unique_ptr + CreateKernelImplementation() override { + LOG(FATAL) << "Not yet implemented"; + } + + stream_executor::SharedMemoryConfig GetDeviceSharedMemoryConfig() override { + LOG(FATAL) << "not yet implemented"; + } + + void* GetSubBuffer(DeviceMemoryBase* parent, uint64 offset, + uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + Status MemZero(Stream* stream, DeviceMemoryBase* location, + uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + Status Memset32(Stream* stream, DeviceMemoryBase* location, uint32 pattern, + uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + Status EnablePeerAccessTo(StreamExecutorInterface* other) override { + LOG(FATAL) << "not yet implemented"; + } + bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override { + LOG(FATAL) << "not yet implemented"; + } + Status SetDeviceSharedMemoryConfig( + stream_executor::SharedMemoryConfig config) override { + LOG(FATAL) << "not yet implemented"; + } + void* HostMemoryAllocate(uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + void HostMemoryDeallocate(void* mem) override { + LOG(FATAL) << "not yet implemented"; + } + bool HostMemoryRegister(void* mem, uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + bool HostMemoryUnregister(void* mem) override { + LOG(FATAL) << "not yet implemented"; + } + Status SynchronousMemZero(DeviceMemoryBase* location, uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + Status SynchronousMemSet(DeviceMemoryBase* location, int value, + uint64 size) override { + LOG(FATAL) << "not yet implemented"; + } + + private: + EventMap event_map_; + TimerMap timer_map_; + + TpuPlatform::StreamMap& stream_map() { + return *(static_cast(platform_)->stream_map()); + } + + ::tensorflow::tpu::TpuPlatformInterface* platform_; + SE_StreamExecutor* executor_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor_c_api.h b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h new file mode 100644 index 00000000000..8bf2ecbc938 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executor_c_api.h @@ -0,0 +1,293 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ + +#include +#include + +#include "tensorflow/c/tf_attrtype.h" +#include "tensorflow/c/tf_datatype.h" +#include "tensorflow/c/tf_status.h" + +typedef struct SE_Platform SE_Platform; +typedef struct SE_StreamExecutor SE_StreamExecutor; +typedef struct SE_Stream SE_Stream; +typedef struct SE_Event SE_Event; +typedef struct SE_Timer SE_Timer; +typedef struct SE_Status SE_Status; + +typedef struct SE_PlatformId { + void* id; // aka stream_executor::Platform::Id +} SE_PlatformId; +typedef struct SE_StreamExecutorConfig SE_StreamExecutorConfig; +typedef struct SE_DeviceOptions SE_DeviceOptions; +typedef SE_Status* (*SE_StatusCallbackFn)(void*); + +typedef struct SE_DeviceMemoryBase { + void* opaque; + uint64_t size; + uint64_t payload; +} SE_DeviceMemoryBase; + +typedef struct SE_AllocatorStats { + int64_t num_allocs; + int64_t bytes_in_use; + int64_t peak_bytes_in_use; + int64_t largest_alloc_size; + + bool has_bytes_limit; + int64_t bytes_limit; + + int64_t bytes_reserved; + int64_t peak_bytes_reserved; + + bool has_bytes_reservable_limit; + int64_t bytes_reservable_limit; + + int64_t largest_free_block_bytes; +} SE_AllocatorStats; + +typedef struct SE_DeviceDescription { + char* device_vendor; + char* platform_version; + char* driver_version; + char* runtime_version; + char* pci_bus_id; + char* name; + + int64_t thread_dim_limit_x; + int64_t thread_dim_limit_y; + int64_t thread_dim_limit_z; + int64_t block_dim_limit_x; + int64_t block_dim_limit_y; + int64_t block_dim_limit_z; + + int64_t threads_per_core_limit; + int64_t threads_per_block_limit; + int64_t threads_per_warp; + + int64_t registers_per_core_limit; + int64_t registers_per_block_limit; + + int64_t device_address_bits; + int64_t device_memory_size; + int64_t memory_bandwidth; + + int64_t shared_memory_per_core; + int64_t shared_memory_per_block; + + float clock_rate_ghz; + + int cuda_compute_capability_major; + int cuda_compute_capability_minor; + + int rocm_amdgpu_isa_version; + + int numa_node; + int core_count; + bool ecc_enabled; +} SE_DeviceDescription; + +typedef struct XLA_TransferManager XLA_TransferManager; + +typedef struct XLA_ComputationPlacer XLA_ComputationPlacer; + +// Represents an XLA shape tree. +// Shapes are flattened in default traversal order. +typedef struct XLA_Shape { + char* bytes; + size_t size; +} XLA_Shape; + +// Represents a leaf node for a XLA shaped buffer. +typedef struct XLA_ShapedBuffer { + XLA_Shape on_host_shape; + XLA_Shape on_device_shape; + int device_ordinal; + + SE_DeviceMemoryBase* bases; + size_t count; +} XLA_ShapedBuffer; + +// Represents a leaf XLA literal. +typedef struct XLA_Literal { + char** buffers; + size_t* sizes; + size_t count; + XLA_Shape shape; +} XLA_Literal; + +typedef void (*XLA_CallbackFn)(void*); +typedef void (*XLA_StatusCallbackFn)(void*, SE_Status*); + +extern "C" { + +SE_Platform* TpuPlatform_New(); +void TpuPlatform_Free(SE_Platform* platform); +void TpuPlatform_Initialize(SE_Platform* platform, size_t options_size, + const char** options_key, + const char** options_value, SE_Status* status); +bool TpuPlatform_Initialized(SE_Platform* platform); +SE_StreamExecutor* TpuPlatform_GetExecutor(SE_Platform* platform, + SE_StreamExecutorConfig* config, + SE_Status* status); +SE_PlatformId TpuPlatform_Id(SE_Platform* platform); +int64_t TpuPlatform_VisibleDeviceCount(SE_Platform* platform); +int64_t TpuPlatform_TpuMemoryLimit(SE_Platform* platform); + +void TpuExecutor_Init(SE_StreamExecutor* executor, int device_ordinal, + SE_DeviceOptions* device_options, SE_Status* status); +void TpuExecutor_Free(SE_StreamExecutor* executor); + +int TpuExecutor_PlatformDeviceCount(SE_StreamExecutor* executor); + +SE_DeviceMemoryBase TpuExecutor_Allocate(SE_StreamExecutor* executor, + uint64_t size, int64_t memory_space); +void TpuExecutor_Deallocate(SE_StreamExecutor* executor, + SE_DeviceMemoryBase* memory); +bool TpuExecutor_GetAllocatorStats(SE_StreamExecutor* executor, + SE_AllocatorStats* stats); +bool TpuExecutor_DeviceMemoryUsage(SE_StreamExecutor* executor, int64_t* free, + int64_t* total); + +bool TpuExecutor_AllocateStream(SE_StreamExecutor* executor, SE_Stream* stream); +void TpuExecutor_DeallocateStream(SE_StreamExecutor* executor, + SE_Stream* stream); +bool TpuExecutor_CreateStreamDependency(SE_StreamExecutor* executor, + SE_Stream* dependent, SE_Stream* other); +void TpuExecutor_GetStatus(SE_StreamExecutor* executor, SE_Stream* stream, + SE_Status* status); + +void TpuExecutor_AllocateEvent(SE_StreamExecutor* executor, SE_Event* event, + SE_Status* status); +void TpuExecutor_DeallocateEvent(SE_StreamExecutor* executor, SE_Event* event, + SE_Status* status); +int TpuExecutor_PollForEventStatus(SE_StreamExecutor* executor, + SE_Event* event); +void TpuExecutor_RecordEvent(SE_StreamExecutor* executor, SE_Stream* stream, + SE_Event* event, SE_Status* status); +void TpuExecutor_WaitForEvent(SE_StreamExecutor* executor, SE_Stream* stream, + SE_Event* event, SE_Status* status); + +bool TpuExecutor_AllocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); +void TpuExecutor_DeallocateTimer(SE_StreamExecutor* executor, SE_Timer* timer); +bool TpuExecutor_StartTimer(SE_StreamExecutor* executor, SE_Stream* stream, + SE_Timer* timer); +bool TpuExecutor_StopTimer(SE_StreamExecutor* executor, SE_Stream* stream, + SE_Timer* timer); + +void TpuExecutor_SynchronousMemcpyToHost(SE_StreamExecutor* executor, + void* host_dst, + const SE_DeviceMemoryBase* device_src, + uint64_t size, SE_Status* status); +void TpuExecutor_SynchronousMemcpyFromHost(SE_StreamExecutor* executor, + SE_DeviceMemoryBase* device_dst, + const void* host_src, uint64_t size, + SE_Status* status); +bool TpuExecutor_MemcpyToHost(SE_StreamExecutor* executor, SE_Stream* stream, + void* host_dst, + const SE_DeviceMemoryBase* device_src, + uint64_t size); + +bool TpuExecutor_MemcpyFromHost(SE_StreamExecutor* executor, SE_Stream* stream, + SE_DeviceMemoryBase* device_dst, + const void* host_src, uint64_t size); + +void TpuExecutor_EnqueueInfeed(SE_StreamExecutor* executor, + int32_t infeed_queue_index, const uint8_t* data, + int64_t size, SE_Status* status); +void TpuExecutor_DequeueOutfeed(SE_StreamExecutor* executor, + int32_t outfeed_queue_index, uint8_t* data, + int64_t size, SE_Status* status); +void TpuExecutor_WaitForInfeedReady(SE_StreamExecutor* executor, + int32_t infeed_queue_index, + SE_Status* status); +void TpuExecutor_WaitForOutfeedReady(SE_StreamExecutor* executor, + int32_t outfeed_queue_index, + SE_Status* status); + +void TpuExecutor_BlockHostUntilDone(SE_StreamExecutor* executor, + SE_Stream* stream, SE_Status* status); +void TpuExecutor_BlockUntilDoneOrFailed(SE_StreamExecutor* executor, + SE_Status* status); +void TpuExecutor_SyncAndForgetFailedStreams(SE_StreamExecutor* executor); +bool TpuExecutor_SynchronizeAllActivity(SE_StreamExecutor* executor); + +SE_Stream* TpuStream_New(SE_StreamExecutor* parent); +void TpuStream_Free(SE_Stream*); +void* TpuStream_Stream(SE_Stream*); +bool TpuStream_Status(SE_Stream*); + +SE_Event* TpuEvent_New(SE_StreamExecutor* parent); +void TpuEvent_Free(SE_Event*); + +SE_Timer* TpuTimer_New(SE_StreamExecutor* parent); +void TpuTimer_Free(SE_Timer*); +int64_t TpuTimer_Nanoseconds(SE_Timer*); +int64_t TpuTimer_Microseconds(SE_Timer*); + +SE_Status* TpuStatus_New(); +SE_Status* TpuStatus_Create(int32_t code, const char* msg); +void TpuStatus_Free(SE_Status* status); +const char* TpuStatus_Message(SE_Status* status); +int TpuStatus_Code(SE_Status* status); +bool TpuStatus_Ok(SE_Status* status); + +SE_StreamExecutorConfig* TpuStreamExecutorConfig_Default(); +void TpuStreamExecutorConfig_SetOrdinal(SE_StreamExecutorConfig*, int ordinal); +void TpuStreamExecutorConfig_Free(SE_StreamExecutorConfig*); + +SE_DeviceDescription* TpuDeviceDescription_New(); +void TpuDeviceDescription_Free(SE_DeviceDescription* description); +void TpuExecutor_CreateDeviceDescription(SE_StreamExecutor* executor, + SE_DeviceDescription* description, + SE_Status* status); + +SE_DeviceOptions* TpuExecutor_NewDeviceOptions(unsigned flags); +void TpuExecutor_FreeDeviceOptions(SE_DeviceOptions* options); + +bool TpuExecutor_HostCallback(SE_StreamExecutor* executor, SE_Stream* stream, + SE_StatusCallbackFn callback_fn, void* ctx); + +XLA_TransferManager* TpuTransferManager_New(); +void TpuTransferManager_Free(XLA_TransferManager* manager); +SE_PlatformId TpuTransferManager_PlatformId(XLA_TransferManager* manager); +void TpuTransferManager_HostShapeToDeviceShape(XLA_TransferManager* manager, + XLA_Shape* host_shape, + XLA_Shape* device_shape); +void TpuTransferManager_TransferLiteralToDeviceAsync( + XLA_TransferManager* manager, SE_Stream* stream, XLA_Literal* literal, + XLA_ShapedBuffer* device_buffer, SE_Status* status); +void TpuTransferManager_TransferLiteralFromDevice( + XLA_TransferManager* manager, SE_Stream* stream, + XLA_ShapedBuffer* device_buffer, XLA_Literal* literal, + XLA_StatusCallbackFn callback, void* ctx); + +int64_t TpuTransferManager_GetByteSizeRequirement(XLA_TransferManager* manager, + XLA_Shape* shape); +void TpuTransferManager_WriteSingleTupleIndexTable( + XLA_TransferManager* manager, SE_Stream* stream, + SE_DeviceMemoryBase* elements, size_t elements_len, XLA_Shape* shape, + SE_DeviceMemoryBase* region, SE_Status* status); + +XLA_ComputationPlacer* TpuComputationPlacer_New(); +void TpuComputationPlacer_Free(XLA_ComputationPlacer* placer); +} + +// extern "C" + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_C_API_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_executor_interface.h b/tensorflow/stream_executor/tpu/tpu_executor_interface.h new file mode 100644 index 00000000000..5b00f615ca7 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_executor_interface.h @@ -0,0 +1,64 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_INTERFACE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_INTERFACE_H_ + +#include "tensorflow/core/platform/errors.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/event.h" +#include "tensorflow/stream_executor/stream.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/timer.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tpu { +class TpuCore; +} // namespace tpu + +namespace tensorflow { +namespace tpu { + +class TpuExecutorInterface + : public ::stream_executor::internal::StreamExecutorInterface { + public: + using Status = ::stream_executor::port::Status; + template + using StatusOr = ::stream_executor::port::StatusOr; + + class TemporaryDeviceMemory { + public: + virtual ~TemporaryDeviceMemory() {} + virtual stream_executor::DeviceMemoryBase AsDeviceMemoryBase() const = 0; + }; + + virtual StatusOr> + CreateTemporaryDeviceMemory(int64 memory_space, int64 byte_offset, + int64 size) { + LOG(FATAL) << "Unimplemented."; + } + + virtual const TpuPlatformInterface& platform() const { + LOG(FATAL) << "Unimplemented."; + } + + virtual TpuPlatformInterface& platform() { LOG(FATAL) << "Unimplemented."; } +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_INTERFACE_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_node_context.cc b/tensorflow/stream_executor/tpu/tpu_node_context.cc new file mode 100644 index 00000000000..2a4954d5b08 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_node_context.cc @@ -0,0 +1,100 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/stream_executor/tpu/tpu_node_context.h" + +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/platform_util.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context_c_api.h" + +namespace tensorflow { +namespace tpu { + +using stream_executor::port::Status; +using stream_executor::port::StatusOr; + +/*static*/ StatusOr> TpuNodeContext::Initialize( + int device_ordinal) { + StatusHelper status; + XLA_TpuNodeContext* node_context = + TpuNodeContext_Create(device_ordinal, status.c_status); + if (!status.status().ok()) { + TpuNodeContext_Free(node_context); + return status.status(); + } + return std::make_unique(device_ordinal, node_context); +} + +TpuNodeContext::~TpuNodeContext() { TpuNodeContext_Free(node_context_); } + +/* static */ +Status TpuNodeContext::StopChipHeartbeats() { + StatusHelper status; + TpuNodeContext_StopChipHeartbeats(status.c_status); + return status.status(); +} + +/* static */ +Status TpuNodeContext::CloseTpuHost() { + StatusHelper status; + TpuNodeContext_CloseTpuHost(status.c_status); + return status.status(); +} + +/* static */ +tensorflow::tpu::TpuPlatformInterface* TpuNodeContext::platform() { + return TpuPlatformInterface::GetRegisteredPlatform(); +} + +/* static */ +stream_executor::DeviceMemoryAllocator* TpuNodeContext::memory_allocator() { + static stream_executor::StreamExecutorMemoryAllocator* memory_allocator = + new stream_executor::StreamExecutorMemoryAllocator( + platform(), + xla::PlatformUtil::GetStreamExecutors(platform()).ValueOrDie()); + return memory_allocator; +} + +/* static */ +xla::Backend* TpuNodeContext::backend() { + static xla::Backend* backend = + xla::Backend::CreateBackend( + xla::BackendOptions().set_platform(platform())) + .ValueOrDie() + .release(); + return backend; +} + +/* static */ +StatusOr TpuNodeContext::BorrowStream( + int device_ordinal) { + return backend()->BorrowStream(device_ordinal); +} + +/* static */ +StatusOr TpuNodeContext::BorrowStream( + stream_executor::StreamExecutor* executor) { + return backend()->BorrowStream(executor); +} + +/* static */ +xla::TransferManager* TpuNodeContext::transfer_manager() { + return xla::TransferManager::GetForPlatform(platform()).ValueOrDie(); +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_node_context.h b/tensorflow/stream_executor/tpu/tpu_node_context.h new file mode 100644 index 00000000000..e1e1ffc67f7 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_node_context.h @@ -0,0 +1,89 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_H_ + +#include + +#include "absl/memory/memory.h" +#include "tensorflow/compiler/xla/service/backend.h" +#include "tensorflow/compiler/xla/service/stream_pool.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/stream_executor/device_memory_allocator.h" +#include "tensorflow/stream_executor/lib/status.h" +#include "tensorflow/stream_executor/lib/statusor.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_node_context_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { +namespace tpu { + +class TpuNodeContext final { + public: + using Status = stream_executor::port::Status; + template + using StatusOr = stream_executor::port::StatusOr; + + static StatusOr> Initialize( + int device_ordinal); + + explicit TpuNodeContext(int device_ordinal, XLA_TpuNodeContext* node_context) + : device_ordinal_(device_ordinal), node_context_(node_context) { + CHECK_NE(node_context, nullptr); + } + ~TpuNodeContext(); + + TpuNodeContext(const TpuNodeContext&) = delete; + TpuNodeContext& operator=(const TpuNodeContext&) = delete; + + static Status StopChipHeartbeats(); + + static Status CloseTpuHost(); + + static tensorflow::tpu::TpuPlatformInterface* platform(); + + static stream_executor::DeviceMemoryAllocator* memory_allocator(); + + static xla::TransferManager* transfer_manager(); + + static xla::Backend* backend(); + + static StatusOr BorrowStream(int device_ordinal); + + static StatusOr BorrowStream( + stream_executor::StreamExecutor* executor); + + stream_executor::StreamExecutor* stream_executor() { + LOG(FATAL) << "Not implemented yet."; + } + + std::string tensor_core_location() { LOG(FATAL) << "Not implemented yet."; } + + int index_on_host() { LOG(FATAL) << "Not implemented yet."; } + + int device_ordinal() const { return device_ordinal_; } + + private: + const int device_ordinal_; + XLA_TpuNodeContext* const node_context_; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h new file mode 100644 index 00000000000..d2684e47df1 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_node_context_c_api.h @@ -0,0 +1,29 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ + +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +typedef struct XLA_TpuNodeContext XLA_TpuNodeContext; + +XLA_TpuNodeContext* TpuNodeContext_Create(int device_ordinal, + SE_Status* status); +void TpuNodeContext_Free(XLA_TpuNodeContext* node_context); + +void TpuNodeContext_StopChipHeartbeats(SE_Status* status); +void TpuNodeContext_CloseTpuHost(SE_Status* status); + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_NODE_CONTEXT_C_API_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_platform.cc b/tensorflow/stream_executor/tpu/tpu_platform.cc new file mode 100644 index 00000000000..c44926df749 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_platform.cc @@ -0,0 +1,125 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_platform.h" + +#include "tensorflow/c/tf_status.h" +#include "tensorflow/stream_executor/platform.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +namespace tensorflow { + +PLATFORM_DEFINE_ID(TpuPlatform::kId); +TpuPlatform* tpu_registered_platform = nullptr; + +using Status = ::stream_executor::port::Status; +template +using StatusOr = ::stream_executor::port::StatusOr; + +TpuPlatform::TpuPlatform() { platform_ = TpuPlatform_New(); } + +TpuPlatform* TpuPlatform::GetRegisteredPlatform() { + return tpu_registered_platform; +} + +Status TpuPlatform::Initialize( + const std::map& platform_options) { + StatusHelper status; + + size_t options_size = platform_options.size(); + const char** options_key = + static_cast(malloc(sizeof(const char*) * options_size)); + const char** options_value = + static_cast(malloc(sizeof(const char*) * options_size)); + + size_t i = 0; + for (const auto& option : platform_options) { + options_key[i] = option.first.c_str(); + options_value[i] = option.second.c_str(); + i++; + } + + TpuPlatform_Initialize(platform_, options_size, options_key, options_value, + status.c_status); + + free(options_key); + free(options_value); + + return status.status(); +} + +TpuPlatform::~TpuPlatform() { TpuPlatform_Free(platform_); } + +int TpuPlatform::VisibleDeviceCount() const { + return TpuPlatform_VisibleDeviceCount(platform_); +} + +StatusOr<::stream_executor::StreamExecutor*> TpuPlatform::GetExecutor( + const ::stream_executor::StreamExecutorConfig& config) { + return executor_cache_.GetOrCreate( + config, [&]() { return GetUncachedExecutor(config); }); +} + +StatusOr> +TpuPlatform::GetUncachedExecutor( + const ::stream_executor::StreamExecutorConfig& config) { + SE_StreamExecutorConfig* c_config = TpuStreamExecutorConfig_Default(); + + TpuStreamExecutorConfig_SetOrdinal(c_config, config.ordinal); + + StatusHelper status; + SE_StreamExecutor* executor = + TpuPlatform_GetExecutor(platform_, c_config, status.c_status); + TpuStreamExecutorConfig_Free(c_config); + if (!status.ok()) { + return status.status(); + } + return std::make_unique( + this, absl::make_unique(this, executor), + config.ordinal); +} + +::stream_executor::Platform::Id TpuPlatform::id() const { + return TpuPlatform::kId; +} + +const std::string& TpuPlatform::Name() const { + static std::string* name = new std::string(kName); + return *name; +} + +int64 TpuPlatform::TpuMemoryLimit() { + return TpuPlatform_TpuMemoryLimit(platform_); +} + +} // namespace tensorflow + +void RegisterTpuPlatform() { + tensorflow::tpu_registered_platform = new tensorflow::TpuPlatform(); + std::unique_ptr platform( + tensorflow::tpu_registered_platform); + SE_CHECK_OK(stream_executor::MultiPlatformManager::RegisterPlatform( + std::move(platform))); +} + +REGISTER_MODULE_INITIALIZER(tpu_platform, RegisterTpuPlatform()); + +// Note that module initialization sequencing is not supported in the +// open-source project, so this will be a no-op there. +REGISTER_MODULE_INITIALIZER_SEQUENCE(tpu_platform, multi_platform_manager); +REGISTER_MODULE_INITIALIZER_SEQUENCE(multi_platform_manager_listener, + tpu_platform); diff --git a/tensorflow/stream_executor/tpu/tpu_platform.h b/tensorflow/stream_executor/tpu/tpu_platform.h new file mode 100644 index 00000000000..9a67045dec6 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_platform.h @@ -0,0 +1,121 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_H_ + +#include + +#include "absl/container/flat_hash_map.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/executor_cache.h" +#include "tensorflow/stream_executor/platform.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +namespace tensorflow { + +class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface { + public: + using StreamMap = + absl::flat_hash_map; + + static const ::stream_executor::Platform::Id kId; + static constexpr char kName[] = "TPU"; + + using Status = ::stream_executor::port::Status; + template + using StatusOr = ::stream_executor::port::StatusOr; + + TpuPlatform(); + + ~TpuPlatform() override; + + static TpuPlatform* GetRegisteredPlatform(); + + Id id() const override; + + const std::string& Name() const override; + + int VisibleDeviceCount() const override; + + int64 TpuMemoryLimit() override; + + bool Initialized() const override { + return TpuPlatform_Initialized(platform_); + } + + Status Initialize( + const std::map& platform_options) override; + + Status Reset() override { return Reset(false); } + + Status Reset(bool only_tear_down) override { + LOG(FATAL) << "Not yet implemented"; + } + + StatusOr> + DescriptionForDevice(int ordinal) const override { + LOG(FATAL) << "Not yet implemented"; + } + + StatusOr<::stream_executor::StreamExecutor*> ExecutorForDevice( + int ordinal) override { + stream_executor::StreamExecutorConfig config; + config.ordinal = ordinal; + return GetExecutor(config); + } + + StatusOr<::stream_executor::StreamExecutor*> + ExecutorForDeviceWithPluginConfig( + int ordinal, + const ::stream_executor::PluginConfig& plugin_config) override { + stream_executor::StreamExecutorConfig config; + config.ordinal = ordinal; + config.plugin_config = plugin_config; + return GetExecutor(config); + } + + StatusOr<::stream_executor::StreamExecutor*> GetExecutor( + const ::stream_executor::StreamExecutorConfig& config) override; + + StatusOr> + GetUncachedExecutor( + const ::stream_executor::StreamExecutorConfig& config) override; + + void RegisterTraceListener( + std::unique_ptr listener) override { + LOG(FATAL) << "Not yet implemented"; + } + + void UnregisterTraceListener( + stream_executor::TraceListener* listener) override { + LOG(FATAL) << "Not yet implemented"; + } + + StreamMap* stream_map() { return &stream_map_; } + + private: + SE_Platform* platform_; + + stream_executor::ExecutorCache executor_cache_; + StreamMap stream_map_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.cc b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc new file mode 100644 index 00000000000..c5b8ece32af --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.cc @@ -0,0 +1,63 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" + +#include "tensorflow/stream_executor/multi_platform_manager.h" + +namespace tensorflow { +namespace tpu { + +/* static */ +TpuPlatformInterface* TpuPlatformInterface::GetRegisteredPlatform() { + // Prefer TpuPlatform if it's registered. + auto status_or_tpu_platform = + stream_executor::MultiPlatformManager::PlatformWithName("TPU"); + if (status_or_tpu_platform.ok()) { + return static_cast( + status_or_tpu_platform.ValueOrDie()); + } + if (status_or_tpu_platform.status().code() != error::NOT_FOUND) { + LOG(WARNING) << "Error when getting the TPU platform: " + << status_or_tpu_platform.status(); + return nullptr; + } + + // Use any other registered TPU platform. + auto status_or_other_tpu_platforms = + stream_executor::MultiPlatformManager::PlatformsWithFilter( + [](const stream_executor::Platform* platform) { + return dynamic_cast(platform) != + nullptr; + }); + if (!status_or_other_tpu_platforms.ok()) { + LOG(WARNING) << "Error when getting other TPU platforms: " + << status_or_tpu_platform.status(); + return nullptr; + } + auto other_tpu_platforms = status_or_other_tpu_platforms.ValueOrDie(); + if (!other_tpu_platforms.empty()) { + LOG(WARNING) << other_tpu_platforms.size() + << " TPU platforms registered, selecting " + << other_tpu_platforms[0]->Name(); + return static_cast(other_tpu_platforms[0]); + } + + LOG(WARNING) << "No TPU platform registered"; + return nullptr; +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_platform_interface.h b/tensorflow/stream_executor/tpu/tpu_platform_interface.h new file mode 100644 index 00000000000..5c7aa8efe94 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_platform_interface.h @@ -0,0 +1,44 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_INTERFACE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_INTERFACE_H_ + +#include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/platform.h" + +namespace tensorflow { +namespace tpu { + +class TpuPlatformInterface : public stream_executor::Platform { + public: + using Status = stream_executor::port::Status; + + // Returns a TPU platform to be used by TPU ops. If multiple TPU platforms are + // registered, finds the most suitable one. Returns nullptr if no TPU platform + // is registered or an error occurred. + static TpuPlatformInterface* GetRegisteredPlatform(); + + virtual Status Reset() { return Reset(false); } + + virtual Status Reset(bool only_tear_down) = 0; + + virtual int64 TpuMemoryLimit() = 0; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_PLATFORM_INTERFACE_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_stream.h b/tensorflow/stream_executor/tpu/tpu_stream.h new file mode 100644 index 00000000000..b8fd10df5d9 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_stream.h @@ -0,0 +1,40 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_H_ + +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +class TpuStream : public stream_executor::internal::StreamInterface { + public: + explicit TpuStream(SE_Stream* stream) : stream_(stream) {} + ~TpuStream() override { TpuStream_Free(stream_); } + + private: + SE_Stream* stream_; +}; + +class TpuEvent : public ::stream_executor::internal::EventInterface { + public: + explicit TpuEvent(SE_Event* event) : event_(event) {} + ~TpuEvent() override { TpuEvent_Free(event_); } + + private: + SE_Event* event_; +}; + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_stream_interface.h b/tensorflow/stream_executor/tpu/tpu_stream_interface.h new file mode 100644 index 00000000000..2e5e02ded7d --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_stream_interface.h @@ -0,0 +1,30 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_INTERFACE_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_INTERFACE_H_ + +#include "tensorflow/stream_executor/stream_executor_internal.h" + +namespace tensorflow { +namespace tpu { + +class TpuStreamInterface : public ::stream_executor::internal::StreamInterface { +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_STREAM_INTERFACE_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_timer.h b/tensorflow/stream_executor/tpu/tpu_timer.h new file mode 100644 index 00000000000..246a0b7eb32 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_timer.h @@ -0,0 +1,38 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TIMER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TIMER_H_ + +#include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +namespace tensorflow { + +class TpuTimer : public ::stream_executor::internal::TimerInterface { + public: + explicit TpuTimer(SE_Timer* timer) : timer_(timer) {} + ~TpuTimer() override { TpuTimer_Free(timer_); } + uint64 Microseconds() const override { return TpuTimer_Microseconds(timer_); } + uint64 Nanoseconds() const override { return TpuTimer_Nanoseconds(timer_); } + + private: + SE_Timer* timer_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TIMER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc new file mode 100644 index 00000000000..473585a12d1 --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.cc @@ -0,0 +1,167 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" + +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/tpu/c_api_conversions.h" +#include "tensorflow/stream_executor/tpu/proto_helper.h" +#include "tensorflow/stream_executor/tpu/status_helper.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" + +namespace tensorflow { + +using Status = stream_executor::port::Status; +template +using StatusOr = stream_executor::port::StatusOr; + +TpuTransferManager::TpuTransferManager() { + manager_ = TpuTransferManager_New(); +} + +TpuTransferManager::~TpuTransferManager() { TpuTransferManager_Free(manager_); } + +stream_executor::Platform::Id TpuTransferManager::PlatformId() const { + return TpuPlatform::kId; +} + +xla::Shape TpuTransferManager::HostShapeToDeviceShape( + const xla::Shape& host_shape) const { + XLA_Shape c_host_shape; + XLA_Shape c_device_shape; + + TpuConversions::XlaShapeToCShape(host_shape, &c_host_shape); + + TpuTransferManager_HostShapeToDeviceShape(manager_, &c_host_shape, + &c_device_shape); + xla::Shape device_shape = TpuConversions::CShapeToXlaShape(&c_device_shape); + TpuConversions::CShapeCleanup(&c_host_shape); + TpuConversions::CShapeCleanup(&c_device_shape); + return device_shape; +} + +Status TpuTransferManager::TransferLiteralToDeviceAsync( + stream_executor::Stream* stream, const xla::LiteralSlice& literal, + const xla::ShapedBuffer& device_buffer, + const TransferMetadata* transfer_metadata) { + StatusHelper status; + + XLA_Literal c_literal; + TpuConversions::XLALiteralToCLiteral(literal, &c_literal); + + XLA_ShapedBuffer c_device_buffer; + TpuConversions::XLAShapedBufferToCShapedBuffer(device_buffer, + &c_device_buffer); + + TpuTransferManager_TransferLiteralToDeviceAsync( + manager_, + TpuPlatform::GetRegisteredPlatform()->stream_map()->at( + stream->implementation()), + &c_literal, &c_device_buffer, status.c_status); + TpuConversions::CShapedBufferCleanup(&c_device_buffer); + TpuConversions::CLiteralCleanup(&c_literal); + return status.status(); +} + +struct TransferFromDeviceState { + std::atomic remaining_transfers; + StatusHelper status_helper; + std::function done; + + void TransferFinished(SE_Status* status) { + if (!TpuStatus_Ok(status) && TpuStatus_Ok(status_helper.c_status)) { + status_helper.c_status = status; + } else { + TpuStatus_Free(status); + } + + if (--remaining_transfers == 0) { + done(status_helper.status()); + delete this; + } + } +}; + +void TransferLiteralFromDeviceTrampoline(void* ctx, SE_Status* status) { + reinterpret_cast(ctx)->TransferFinished(status); +} + +void TpuTransferManager::TransferLiteralFromDevice( + stream_executor::Stream* stream, const xla::ShapedBuffer& device_buffer, + xla::MutableBorrowingLiteral literal, std::function done, + const TransferMetadata* transfer_metadata) { + TransferFromDeviceState* state = new TransferFromDeviceState; + state->remaining_transfers = 1; + state->done = done; + XLA_ShapedBuffer c_device_buffer; + TpuConversions::XLAShapedBufferToCShapedBuffer(device_buffer, + &c_device_buffer); + XLA_Literal c_literal; + TpuConversions::XLALiteralToCLiteral(literal, &c_literal); + + TpuTransferManager_TransferLiteralFromDevice( + manager_, + TpuPlatform::GetRegisteredPlatform()->stream_map()->at( + stream->implementation()), + &c_device_buffer, &c_literal, TransferLiteralFromDeviceTrampoline, state); + TpuConversions::CShapedBufferCleanup(&c_device_buffer); + TpuConversions::CLiteralCleanup(&c_literal); +} + +int64 TpuTransferManager::GetByteSizeRequirement( + const xla::Shape& shape) const { + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + + int64 size_in_bytes = + TpuTransferManager_GetByteSizeRequirement(manager_, &c_shape); + + TpuConversions::CShapeCleanup(&c_shape); + return size_in_bytes; +} + +Status TpuTransferManager::WriteSingleTupleIndexTable( + stream_executor::Stream* stream, + absl::Span elements, + const xla::Shape& shape, stream_executor::DeviceMemoryBase* region) { + CHECK_GT(elements.size(), 0); + SE_DeviceMemoryBase* elements_bases = + new SE_DeviceMemoryBase[elements.size()]; + for (int i = 0; i < elements.size(); i++) { + elements_bases[i] = + SE_DeviceMemoryBase{const_cast(elements[i].opaque()), + elements[i].size(), elements[i].payload()}; + } + XLA_Shape c_shape; + TpuConversions::XlaShapeToCShape(shape, &c_shape); + SE_DeviceMemoryBase region_base{region->opaque(), region->size(), + region->payload()}; + StatusHelper status; + + TpuTransferManager_WriteSingleTupleIndexTable( + manager_, + TpuPlatform::GetRegisteredPlatform()->stream_map()->at( + stream->implementation()), + elements_bases, elements.size(), &c_shape, ®ion_base, status.c_status); + + delete[] elements_bases; + TpuConversions::CShapeCleanup(&c_shape); + return status.status(); +} + +} // namespace tensorflow diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager.h b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h new file mode 100644 index 00000000000..163ac81ea5f --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager.h @@ -0,0 +1,83 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_H_ +#define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_H_ + +#include "tensorflow/compiler/xla/literal.h" +#include "tensorflow/compiler/xla/service/shaped_buffer.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/shape.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/tpu/tpu_executor_c_api.h" + +namespace tensorflow { + +class TpuTransferManager : public xla::TransferManager { + public: + TpuTransferManager(); + ~TpuTransferManager() override; + + using Status = stream_executor::port::Status; + template + using StatusOr = stream_executor::port::StatusOr; + + stream_executor::Platform::Id PlatformId() const override; + + xla::Shape HostShapeToDeviceShape( + const xla::Shape& host_shape) const override; + + Status TransferLiteralToDeviceAsync( + stream_executor::Stream* stream, const xla::LiteralSlice& literal, + const xla::ShapedBuffer& device_buffer, + const TransferMetadata* transfer_metadata) override; + + void TransferLiteralFromDevice( + stream_executor::Stream* stream, const xla::ShapedBuffer& device_buffer, + xla::MutableBorrowingLiteral literal, std::function done, + const TransferMetadata* transfer_metadata) override; + + Status TransferLiteralToInfeed(stream_executor::StreamExecutor* executor, + const xla::LiteralSlice& literal) override { + LOG(FATAL) << "Not yet implemented"; + } + + Status TransferLiteralFromOutfeed( + stream_executor::StreamExecutor* executor, + const xla::Shape& literal_shape, + xla::MutableBorrowingLiteral literal) override { + LOG(FATAL) << "Not yet implemented"; + } + + Status ResetDevices( + absl::Span executor) override { + LOG(FATAL) << "Not yet implemented"; + } + + int64 GetByteSizeRequirement(const xla::Shape& shape) const override; + + Status WriteSingleTupleIndexTable( + stream_executor::Stream* stream, + absl::Span elements, + const xla::Shape& shape, + stream_executor::DeviceMemoryBase* region) override; + + private: + XLA_TransferManager* manager_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_TRANSFER_MANAGER_H_ diff --git a/tensorflow/stream_executor/tpu/tpu_transfer_manager_registration.cc b/tensorflow/stream_executor/tpu/tpu_transfer_manager_registration.cc new file mode 100644 index 00000000000..f7f0c6fbe2c --- /dev/null +++ b/tensorflow/stream_executor/tpu/tpu_transfer_manager_registration.cc @@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/stream_executor/tpu/tpu_platform.h" +#include "tensorflow/stream_executor/tpu/tpu_transfer_manager.h" + +namespace tensorflow { + +static std::unique_ptr CreateTpuTransferManager() { + return std::make_unique(); +} + +static bool InitModule() { + xla::TransferManager::RegisterTransferManager(TpuPlatform::kId, + CreateTpuTransferManager); + return true; +} +static bool module_initialized = InitModule(); + +} // namespace tensorflow From d570632ba89e7352a3d08acfc2c1737f9e5983af Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Mon, 8 Jun 2020 23:59:21 -0700 Subject: [PATCH 108/178] Do not wrap a single x input passed to Model.fit in a tuple This makes Numpy and Tensor array behavior consistent with the behavior when passing a Dataset, generator, or Keras Sequence object with a single Tensor. In all cases, the single Tensor is passed to Model.train_step directly without modification PiperOrigin-RevId: 315431108 Change-Id: I65d7a57967ffa54ae7786029d235c7f3c37da80f --- .../python/keras/engine/data_adapter.py | 18 ++++++--- .../python/keras/engine/data_adapter_test.py | 20 +++++++++- .../python/keras/engine/training_test.py | 37 +++++++++++++++++++ 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index bf0bbb7d994..469355dd722 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -1500,12 +1500,13 @@ def pack_x_y_sample_weight(x, y=None, sample_weight=None): >>> x = tf.ones((10, 1)) >>> data = tf.keras.utils.pack_x_y_sample_weight(x) - >>> len(data) - 1 + >>> isinstance(data, tf.Tensor) + True >>> y = tf.ones((10, 1)) >>> data = tf.keras.utils.pack_x_y_sample_weight(x, y) - >>> len(data) - 2 + >>> isinstance(data, tuple) + True + >>> x, y = data Arguments: x: Features to pass to `Model`. @@ -1516,7 +1517,14 @@ def pack_x_y_sample_weight(x, y=None, sample_weight=None): Tuple in the format used in `Model.fit`. """ if y is None: - return (x,) + # For single x-input, we do no tuple wrapping since in this case + # there is no ambiguity. This also makes NumPy and Dataset + # consistent in that the user does not have to wrap their Dataset + # data in an unecessary tuple + if not nest.is_sequence(x): + return x + else: + return (x,) elif sample_weight is None: return (x, y) else: diff --git a/tensorflow/python/keras/engine/data_adapter_test.py b/tensorflow/python/keras/engine/data_adapter_test.py index 3f4e6d0cb83..be9c6d79193 100644 --- a/tensorflow/python/keras/engine/data_adapter_test.py +++ b/tensorflow/python/keras/engine/data_adapter_test.py @@ -278,7 +278,7 @@ class TensorLikeDataAdapterTest(DataAdapterTestBase): def _get_epoch(ds_iter): ds_data = [] for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter)[0].numpy()) + ds_data.append(next(ds_iter).numpy()) return np.concatenate(ds_data) ds_iter = iter(adapter.get_dataset()) @@ -507,7 +507,7 @@ class GenericArrayLikeDataAdapterTest(DataAdapterTestBase): def _get_epoch(ds_iter): ds_data = [] for _ in range(int(math.ceil(num_samples / batch_size))): - ds_data.append(next(ds_iter)[0].numpy()) + ds_data.append(next(ds_iter).numpy()) return np.concatenate(ds_data) ds_iter = iter(adapter.get_dataset()) @@ -981,6 +981,22 @@ class DataHandlerTest(keras_parameterized.TestCase): 2: 1.5 }) + @parameterized.named_parameters(('numpy', True), ('dataset', False)) + def test_single_x_input_no_tuple_wrapping(self, use_numpy): + x = np.ones((10, 1)) + + if use_numpy: + batch_size = 2 + else: + x = dataset_ops.Dataset.from_tensor_slices(x).batch(2) + batch_size = None + + data_handler = data_adapter.DataHandler(x, batch_size=batch_size) + for _, iterator in data_handler.enumerate_epochs(): + for _ in data_handler.steps(): + # Check that single x input is not wrapped in a tuple. + self.assertIsInstance(next(iterator), ops.Tensor) + class TestValidationSplit(keras_parameterized.TestCase): diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index bc63c3acec6..523349faf58 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1558,6 +1558,43 @@ class TrainingTest(keras_parameterized.TestCase): # assign_add not called. self.assertEqual(self.evaluate(layer.v), 1.) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) + @parameterized.named_parameters(('numpy', True), ('dataset', False)) + def test_single_input_no_tuple_wrapping(self, use_numpy): + x = np.ones((10, 1)) + + if use_numpy: + batch_size = 3 + else: + x = dataset_ops.Dataset.from_tensor_slices(x).batch(3) + batch_size = None + + test_case = self + + class MyModel(training_module.Model): + + def train_step(self, data): + # No tuple wrapping for single x input and no targets. + test_case.assertIsInstance(data, ops.Tensor) + return super(MyModel, self).train_step(data) + + def test_step(self, data): + test_case.assertIsInstance(data, ops.Tensor) + return super(MyModel, self).test_step(data) + + def predict_step(self, data): + test_case.assertIsInstance(data, ops.Tensor) + return super(MyModel, self).predict_step(data) + + inputs = layers_module.Input(1) + outputs = layers_module.Dense(1)(inputs) + model = MyModel(inputs, outputs) + model.add_loss(math_ops.reduce_sum(outputs)) + model.compile('sgd', 'mse') + model.fit(x, batch_size=batch_size) + model.evaluate(x, batch_size=batch_size) + model.predict(x, batch_size=batch_size) + class TestExceptionsAndWarnings(keras_parameterized.TestCase): From 8398b862ffc988165c63ed5c76fdeae9635d3640 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 9 Jun 2020 00:19:53 -0700 Subject: [PATCH 109/178] Mark layer `updates` property as deprecated as updates get applied automatically in TF2. PiperOrigin-RevId: 315433255 Change-Id: I690ec4b15f3dea228a7d26e0bd5332617c0525a2 --- tensorflow/python/keras/engine/base_layer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 561355f5f57..87f306b2879 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -1327,10 +1327,13 @@ class Layer(module.Module, version_utils.LayerVersionSelector): return self.trainable_weights + self.non_trainable_weights @property - @doc_controls.do_not_doc_inheritable + @deprecation.deprecated( + date=None, + instructions='This property should not be used in TensorFlow 2.0, ' + 'as updates are applied automatically.') + @doc_controls.do_not_generate_docs def updates(self): - if (keras_tensor.keras_tensors_enabled() - and ops.executing_eagerly_outside_functions()): + if keras_tensor.keras_tensors_enabled(): return [] collected_updates = [] From 8dffa4de1b20769f0a6643c640185543098d47ac Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Tue, 9 Jun 2020 01:09:01 -0700 Subject: [PATCH 110/178] Follow up to https://github.com/tensorflow/tensorflow/commit/0e1f3de50a5c22bfb871d76d306770eb283be95d. This change enables keras tensor tests related to updates/losses. - `updates` are not relevant in V2. The original `KerasTensor` change returns an empty list for updates. This change modifies the tests checking for updates to run only in v1 mode or updates the test logic as required. - Some of the losses/add_loss tests were failing with KerasTensor because we were trying to convert KerasTensor to Tensor. This code changes/moves the conversions as required. PiperOrigin-RevId: 315438845 Change-Id: Ic2a5341cc5f2684649e2efc006e34a33e7da31ee --- .../python/keras/engine/functional_test.py | 9 ++++-- .../python/keras/engine/training_test.py | 14 +++++---- .../python/keras/layers/normalization_test.py | 17 +++++------ tensorflow/python/keras/models_test.py | 12 ++++---- .../keras/tests/add_loss_correctness_test.py | 29 +++++++++---------- tensorflow/python/keras/utils/losses_utils.py | 7 ++++- .../python/keras/utils/metrics_utils.py | 1 + tensorflow/python/ops/losses/util.py | 1 - tensorflow/python/ops/nn_impl.py | 1 + 9 files changed, 53 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/keras/engine/functional_test.py b/tensorflow/python/keras/engine/functional_test.py index 68b40caad9b..25b433ce582 100644 --- a/tensorflow/python/keras/engine/functional_test.py +++ b/tensorflow/python/keras/engine/functional_test.py @@ -107,7 +107,7 @@ class NetworkConstructionTest(keras_parameterized.TestCase): network.add_update(state_ops.assign_add(layer.b, x4), inputs=True) self.assertEqual(len(network.updates), 7) - @combinations.generate(combinations.combine(mode=['graph', 'eager'])) + @combinations.generate(combinations.combine(mode=['graph'])) def test_get_updates_bn(self): x1 = input_layer_lib.Input(shape=(1,)) layer = layers.BatchNormalization() @@ -1593,9 +1593,9 @@ class GraphUtilsTest(test.TestCase): tf_utils.get_reachable_from_inputs([x_3]), {x_3, x_5, x_5.op}) -@combinations.generate(combinations.combine(mode=['graph', 'eager'])) class NestedNetworkTest(keras_parameterized.TestCase): + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_nested_inputs_network(self): inputs = { 'x1': input_layer_lib.Input(shape=(1,)), @@ -1620,6 +1620,7 @@ class NestedNetworkTest(keras_parameterized.TestCase): }) self.assertListEqual(output_shape.as_list(), [None, 1]) + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_nested_outputs_network(self): inputs = input_layer_lib.Input(shape=(1,)) outputs = { @@ -1640,6 +1641,7 @@ class NestedNetworkTest(keras_parameterized.TestCase): self.assertListEqual(output_shape['x+x'].as_list(), [None, 1]) self.assertListEqual(output_shape['x*x'].as_list(), [None, 1]) + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_nested_network_inside_network(self): inner_inputs = { 'x1': input_layer_lib.Input(shape=(1,)), @@ -1672,6 +1674,7 @@ class NestedNetworkTest(keras_parameterized.TestCase): output_shape = network.compute_output_shape([(None, 1), (None, 1)]) self.assertListEqual(output_shape.as_list(), [None, 1]) + @combinations.generate(combinations.combine(mode=['graph'])) def test_updates_with_direct_call(self): inputs = input_layer_lib.Input(shape=(10,)) x = layers.BatchNormalization()(inputs) @@ -1683,6 +1686,7 @@ class NestedNetworkTest(keras_parameterized.TestCase): self.assertLen(model.updates, 4) + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_dict_mapping_input(self): class ReturnFirst(layers.Layer): @@ -1708,6 +1712,7 @@ class NestedNetworkTest(keras_parameterized.TestCase): res = reversed_model({'a': a_val, 'b': b_val}) self.assertAllClose(self.evaluate(res), self.evaluate(b_val)) + @combinations.generate(combinations.combine(mode=['graph', 'eager'])) def test_dict_mapping_single_input(self): b = input_layer_lib.Input(shape=(1,), name='b') outputs = b * 2 diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 523349faf58..654435d8edf 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -690,7 +690,7 @@ class TrainingTest(keras_parameterized.TestCase): metrics=['accuracy'], run_eagerly=testing_utils.should_run_eagerly()) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_that_trainable_disables_updates(self): val_a = np.random.random((10, 4)) val_out = np.random.random((10, 4)) @@ -701,13 +701,15 @@ class TrainingTest(keras_parameterized.TestCase): model = training_module.Model(a, b) model.trainable = False - assert not model.updates + if not ops.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) model.compile( 'sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) - assert not model.updates + if not ops.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) x1 = model.predict(val_a) model.train_on_batch(val_a, val_out) @@ -719,7 +721,8 @@ class TrainingTest(keras_parameterized.TestCase): 'sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) - assert model.updates + if not ops.executing_eagerly_outside_functions(): + self.assertAllGreater(len(model.updates), 0) model.train_on_batch(val_a, val_out) x2 = model.predict(val_a) @@ -730,7 +733,8 @@ class TrainingTest(keras_parameterized.TestCase): 'sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly()) - assert not model.updates + if not ops.executing_eagerly_outside_functions(): + self.assertEmpty(model.updates) x1 = model.predict(val_a) model.train_on_batch(val_a, val_out) diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py index 4d1e3213ba7..e14977edfc4 100644 --- a/tensorflow/python/keras/layers/normalization_test.py +++ b/tensorflow/python/keras/layers/normalization_test.py @@ -311,18 +311,17 @@ class BatchNormalizationV2Test(keras_parameterized.TestCase): norm(inp) def test_updates_in_wrap_function(self): - with context.eager_mode(): - layer = keras.layers.BatchNormalization() + layer = normalization.BatchNormalization() - def my_func(): - x = array_ops.ones((10, 1)) - return layer(x, training=True) + def my_func(): + x = array_ops.ones((10, 1)) + return layer(x, training=True) - wrapped_fn = wrap_function.wrap_function(my_func, []) - wrapped_fn() + wrapped_fn = wrap_function.wrap_function(my_func, []) + wrapped_fn() - # Updates should be tracked in a `wrap_function`. - self.assertLen(layer.updates, 2) + # Updates should be tracked in a `wrap_function`. + self.assertLen(layer.updates, 2) @keras_parameterized.run_all_keras_modes def test_basic_batchnorm_v2_none_shape_and_virtual_batch_size(self): diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py index 4db01d66edb..86a2fab10ac 100644 --- a/tensorflow/python/keras/models_test.py +++ b/tensorflow/python/keras/models_test.py @@ -79,7 +79,7 @@ def _get_model(input_shape=(4,)): class TestModelCloning(keras_parameterized.TestCase): - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ {'testcase_name': 'has_input_layer', 'input_shape': (4,), @@ -122,7 +122,7 @@ class TestModelCloning(keras_parameterized.TestCase): isinstance(new_model._layers[0], keras.layers.InputLayer), add_input_layer) self.assertEqual(new_model._is_graph_network, model._is_graph_network) - if input_shape: + if input_shape and not ops.executing_eagerly_outside_functions(): # update ops from batch norm needs to be included self.assertGreaterEqual(len(new_model.updates), 2) @@ -142,7 +142,7 @@ class TestModelCloning(keras_parameterized.TestCase): self.assertIsInstance(new_model._layers[0], keras.layers.InputLayer) self.assertTrue(new_model._is_graph_network) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes @parameterized.named_parameters([ {'testcase_name': 'clone_weights', 'share_weights': False}, {'testcase_name': 'share_weights', 'share_weights': True}, @@ -173,7 +173,8 @@ class TestModelCloning(keras_parameterized.TestCase): # With placeholder creation new_model = clone_fn(model) - self.assertGreaterEqual(len(new_model.updates), 2) + if not ops.executing_eagerly_outside_functions(): + self.assertGreaterEqual(len(new_model.updates), 2) new_model.compile( testing_utils.get_v2_optimizer('rmsprop'), 'mse', @@ -185,7 +186,8 @@ class TestModelCloning(keras_parameterized.TestCase): input_b = keras.Input(shape=(4,), name='b') new_model = keras.models.clone_model( model, input_tensors=[input_a, input_b]) - self.assertLen(new_model.updates, 2) + if not ops.executing_eagerly_outside_functions(): + self.assertLen(new_model.updates, 2) new_model.compile( testing_utils.get_v2_optimizer('rmsprop'), 'mse', diff --git a/tensorflow/python/keras/tests/add_loss_correctness_test.py b/tensorflow/python/keras/tests/add_loss_correctness_test.py index a7708b0999d..a19eec75ffb 100644 --- a/tensorflow/python/keras/tests/add_loss_correctness_test.py +++ b/tensorflow/python/keras/tests/add_loss_correctness_test.py @@ -69,7 +69,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): self.y = np.array([[0.5], [2.], [3.5]], dtype='float32') self.w = np.array([[1.25], [0.5], [1.25]], dtype='float32') - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_loss_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) @@ -85,8 +85,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): self.assertAllClose(history.history['loss'], [2., 1.8, 1.6, 1.4, 1.2], 1e-3) @keras_parameterized.run_with_all_model_types(exclude_models=['sequential']) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True, - always_skip_v1=True) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) def test_loss_callable_on_model_fit(self): model = testing_utils.get_model_from_layers([testing_utils.Bias()], input_shape=(1,)) @@ -145,7 +144,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss = [train_step(self.x, self.y) for _ in range(5)] self.assertAllClose(loss, [0., -0.05, -0.1, -0.15, -0.2], 1e-3) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_loss_with_sample_weight_on_model_fit(self): inputs = Input(shape=(1,)) targets = Input(shape=(1,)) @@ -182,7 +181,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss = [train_step(self.x, self.y, self.w) for _ in range(5)] self.assertAllClose(loss, [2., 1.8, 1.6, 1.4, 1.2], 1e-3) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_loss_with_sample_weight_in_model_call(self): class MyModel(Model): @@ -210,7 +209,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): eval_out = model.evaluate([self.x, self.y, self.w]) self.assertAlmostEqual(eval_out, 1.0, 3) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_loss_with_sample_weight_in_layer_call(self): class MyLayer(layers.Layer): @@ -245,7 +244,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): output = model.test_on_batch([self.x, self.y, self.w]) self.assertAlmostEqual(output, 1.0, 3) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_loss_on_layer(self): class MyLayer(layers.Layer): @@ -266,7 +265,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss = model.train_on_batch(np.ones((2, 3)), np.ones((2, 3))) self.assertEqual(loss, 2 * 3) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes @keras_parameterized.run_with_all_model_types def test_activity_regularizer(self): loss = {} @@ -300,7 +299,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss[reg] = model.evaluate(x, y) self.assertLess(loss[None], loss['l2']) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes @keras_parameterized.run_with_all_model_types def test_activity_regularizer_loss_value(self): layer = layers.Dense( @@ -319,7 +318,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss = model.test_on_batch(x) self.assertAlmostEqual(0.01, loss, places=4) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_activity_regularizer_batch_independent(self): inputs = layers.Input(shape=(10,)) x = layers.Dense(10, activation='relu', activity_regularizer='l2')(inputs) @@ -335,7 +334,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): loss_big_batch = model.test_on_batch(np.ones((20, 10), 'float32')) self.assertAlmostEqual(loss_small_batch, loss_big_batch, places=4) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_with_shared_layer(self): class LayerWithLoss(layers.Layer): @@ -352,7 +351,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): self.assertEqual(len(m2.losses), 2) self.assertAllClose(m2.losses, [6, 12]) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_with_shared_nested_layer(self): class LayerWithLoss(layers.Layer): @@ -378,7 +377,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): self.assertEqual(len(m2.losses), 2) self.assertAllClose(m2.losses, [6, 12]) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_clear_losses(self): class LayerWithSharedNestedLossLayer(layers.Layer): @@ -429,7 +428,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): self.assertEqual(len(model.get_losses_for(x4)), 2) self.assertEqual(len(model.get_losses_for(None)), 1) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_invalid_constant_input(self): with context.eager_mode(): inputs = Input(shape=(1,)) @@ -440,7 +439,7 @@ class TestAddLossCorrectness(keras_parameterized.TestCase): 'Expected a symbolic Tensors or a callable for the loss value'): model.add_loss(1.) - @keras_parameterized.run_all_keras_modes(skip_keras_tensors=True) + @keras_parameterized.run_all_keras_modes def test_invalid_variable_input(self): with context.eager_mode(): inputs = Input(shape=(1,)) diff --git a/tensorflow/python/keras/utils/losses_utils.py b/tensorflow/python/keras/utils/losses_utils.py index e81058e3b70..fecb0433256 100644 --- a/tensorflow/python/keras/utils/losses_utils.py +++ b/tensorflow/python/keras/utils/losses_utils.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.framework import ops from tensorflow.python.keras import backend as K +from tensorflow.python.keras.engine import keras_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.losses import loss_reduction @@ -101,8 +102,12 @@ def compute_weighted_loss(losses, # to multiple replicas. Used only for estimator + v1 optimizer flow. ops.get_default_graph()._last_loss_reduction = reduction # pylint: disable=protected-access - losses = ops.convert_to_tensor_v2(losses) + if not isinstance(losses, keras_tensor.KerasTensor): + losses = ops.convert_to_tensor_v2(losses) input_dtype = losses.dtype + + if not isinstance(sample_weight, keras_tensor.KerasTensor): + sample_weight = ops.convert_to_tensor_v2(sample_weight) weighted_losses = tf_losses_utils.scale_losses_by_sample_weight( losses, sample_weight) # Apply reduction function to the individual weighted losses. diff --git a/tensorflow/python/keras/utils/metrics_utils.py b/tensorflow/python/keras/utils/metrics_utils.py index 5f9b57c095e..81d6a8d8923 100644 --- a/tensorflow/python/keras/utils/metrics_utils.py +++ b/tensorflow/python/keras/utils/metrics_utils.py @@ -346,6 +346,7 @@ def update_confusion_matrix_variables(variables_to_update, y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true) else: + sample_weight = math_ops.cast(sample_weight, dtype=variable_dtype) y_pred, y_true, sample_weight = ( tf_losses_utils.squeeze_or_expand_dimensions( y_pred, y_true, sample_weight=sample_weight)) diff --git a/tensorflow/python/ops/losses/util.py b/tensorflow/python/ops/losses/util.py index 279fe0aba13..49474833cdb 100644 --- a/tensorflow/python/ops/losses/util.py +++ b/tensorflow/python/ops/losses/util.py @@ -84,7 +84,6 @@ def squeeze_or_expand_dimensions(y_pred, y_true=None, sample_weight=None): if sample_weight is None: return y_pred, y_true - sample_weight = ops.convert_to_tensor(sample_weight) weights_shape = sample_weight.shape weights_rank = weights_shape.ndims if weights_rank == 0: # If weights is scalar, do nothing. diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index cb028bfe1e0..0a2dba8cf7d 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -427,6 +427,7 @@ def compute_average_loss(per_example_loss, with losses_util.check_per_example_loss_rank(per_example_loss): if sample_weight is not None: + sample_weight = ops.convert_to_tensor(sample_weight) per_example_loss = losses_util.scale_losses_by_sample_weight( per_example_loss, sample_weight) per_example_loss = math_ops.cast(per_example_loss, input_dtype) From c1df171d427b0a5badd6f3a82df2bf27360de605 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 02:02:30 -0700 Subject: [PATCH 111/178] Update GraphDef version to 427. PiperOrigin-RevId: 315443842 Change-Id: I801e38568d08f62fb4cb76757e0e71b42115d4b5 --- tensorflow/core/public/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 1aa8c84305e..1ac69be162d 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -108,7 +108,7 @@ limitations under the License. #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0 -#define TF_GRAPH_DEF_VERSION 426 // Updated: 2020/6/8 +#define TF_GRAPH_DEF_VERSION 427 // Updated: 2020/6/9 // Checkpoint compatibility versions (the versions field in SavedSliceMeta). // From 55e93450d7d8b03aba6bbf1e4555f6a13d733f8a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 02:02:33 -0700 Subject: [PATCH 112/178] compat: Update forward compatibility horizon to 2020-06-09 PiperOrigin-RevId: 315443845 Change-Id: I09a0d862d954c35b6e1290e51ddbe1767be0aa7b --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 87e91ef79b8..c552e1d086d 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export # This value changes every day with an automatic CL. It can be modified in code # via `forward_compatibility_horizon()` or with the environment variable # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date. -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 8) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 9) _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS" _FORWARD_COMPATIBILITY_DATE_NUMBER = None From f76e9c291518babf9b69b75b969dfb296a3a9988 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 9 Jun 2020 04:10:32 -0700 Subject: [PATCH 113/178] Use cudnn for grouped backward input convolution. Previously we didn't enable this code because it looked like we had some benchmark regressions. It turns out this was just noise. PiperOrigin-RevId: 315456845 Change-Id: I32404152ad35692461808e2d8d449e21e55ac95c --- .../xla/service/gpu/gpu_conv_rewriter.cc | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.cc index 9d34bb39ba8..fb8c05798d8 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.cc @@ -321,38 +321,11 @@ MatchBackwardInput(HloInstruction* conv) { const auto no_match_result = std::make_tuple(false, Window(), ConvolutionDimensionNumbers(), nullptr); - // TODO: Theoretically cuDNN supports grouped convolutions also - // for the backward input convolution, but based on the cudnn's current state - // there is not much performance improvement when using the - // cudnn backward input API for grouped conv. - // This needs to be re-evaluated for future cuDNN versions. - // Note that we already have the necessary code down below, the only thing to - // enable it is to remove the following early return. - if (conv->feature_group_count() > 1) { - return no_match_result; - } - // Match instruction pattern. CHECK_EQ(HloOpcode::kConvolution, conv->opcode()); HloInstruction* reverse_filter = conv->mutable_operand(1); ConvolutionDimensionNumbers dnums = conv->convolution_dimension_numbers(); - // Match BackwardInput for a depthwise convolution and thunk it to forward - // convolution Output feature dimension and input feature dimension has been - // swapped in the bridge. Hence to get the actual input features we need to - // query the output feature dimension - auto kernel_out_feature_dim = dnums.kernel_output_feature_dimension(); - auto kernel_out_features = - reverse_filter->shape().dimensions(kernel_out_feature_dim); - - // For a depthwise convolution, the input features must be equal to the - // feature_group_count. We can leverage this property to match a depthwise - // convolution and thunk it to forward conv - if (conv->feature_group_count() > 1 && - kernel_out_features == conv->feature_group_count()) { - return no_match_result; - } - // We pattern-match to a backwards input conv if: // // - all spatial dims of the filter are reversed From c2548337173ca6d4c1949225bc3dee2dc3f1a7dc Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Tue, 9 Jun 2020 05:55:16 -0700 Subject: [PATCH 114/178] Bump the ruy repository reference, getting in particular: - Use of the cpuinfo thirdparty library to perform CPU feature detection, including detection of dot-product instructions on iOS and detection of cache sizes. Combined with the ARM64 build change below, this means that iPhone11/SE devices now use dot-product instructions. We measured 250 Gop/s on 1 Lightning core in iPhone 11 Pro. - ARM32: @lissyx's fix in google/ruy#69 - ARM64: build the kNeonDotprod path unconditionally (used to be linux-only). - x86-64: enabling AVX512 instructions in the general opensource build. PiperOrigin-RevId: 315467315 Change-Id: I0788ddc68da8d0aa2eee71f820fb5d8c188d35b2 --- .../lite/micro/tools/make/third_party_downloads.inc | 4 ++-- tensorflow/lite/tools/make/download_dependencies.sh | 4 ++-- tensorflow/tools/pip_package/BUILD | 2 ++ third_party/ruy/workspace.bzl | 8 ++++---- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/third_party_downloads.inc b/tensorflow/lite/micro/tools/make/third_party_downloads.inc index 8071901d3e7..1c3eaedca67 100644 --- a/tensorflow/lite/micro/tools/make/third_party_downloads.inc +++ b/tensorflow/lite/micro/tools/make/third_party_downloads.inc @@ -56,8 +56,8 @@ SIFIVE_FE310_LIB_MD5 := "06ee24c4956f8e21670ab3395861fe64" KISSFFT_URL="https://github.com/mborgerding/kissfft/archive/v130.zip" KISSFFT_MD5="438ba1fef5783cc5f5f201395cc477ca" -RUY_URL="https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip" -RUY_MD5="2d54f058f8f7120dfc1ecee79dbf259e" +RUY_URL="https://github.com/google/ruy/archive/c347b02c23cfc459678db6d7c230d76fac00f76d.zip" +RUY_MD5="1a0d9c5e5217d6776ea6d9af494da527" CIFAR10_DATASET_URL="https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz" CIFAR10_DATASET_MD5="c32a1d4ab5d03f1284b67883e8d87530" diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh index a7840f6dcd0..90ae2ef919e 100755 --- a/tensorflow/lite/tools/make/download_dependencies.sh +++ b/tensorflow/lite/tools/make/download_dependencies.sh @@ -37,8 +37,8 @@ EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${B EIGEN_SHA="$(eval echo $(grep '# SHARED_EIGEN_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)" GEMMLOWP_SHA="$(eval echo $(grep '# SHARED_GEMMLOWP_SHA' "${BZL_FILE_PATH}" | grep -o '\".*\"'))" -RUY_URL="https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip" -RUY_SHA="b21524de00c63b3d5683b42557f78452e791cf77fddb2e63f9bcba1f7bd99093" +RUY_URL="https://github.com/google/ruy/archive/c347b02c23cfc459678db6d7c230d76fac00f76d.zip" +RUY_SHA="243a3d0d4283c1f8e774814a4096961288a00a2662e84b3cd564afbf500bb0ad" GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz" GOOGLETEST_SHA="58a6f4277ca2bc8565222b3bbd58a177609e9c488e8a72649359ba51450db7d8" ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)" diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 43bc04a1b60..36e20408c53 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -214,6 +214,8 @@ filegroup( "@sobol_data//:LICENSE", "@termcolor_archive//:COPYING.txt", "@zlib//:zlib.h", + "@clog//:LICENSE", + "@cpuinfo//:LICENSE", ] + select({ "//tensorflow:android": [], "//tensorflow:ios": [], diff --git a/third_party/ruy/workspace.bzl b/third_party/ruy/workspace.bzl index c4ed692df4d..10d8492a8ce 100644 --- a/third_party/ruy/workspace.bzl +++ b/third_party/ruy/workspace.bzl @@ -5,11 +5,11 @@ load("//third_party:repo.bzl", "third_party_http_archive") def repo(): third_party_http_archive( name = "ruy", - sha256 = "b21524de00c63b3d5683b42557f78452e791cf77fddb2e63f9bcba1f7bd99093", - strip_prefix = "ruy-1b313682ef8b8fc8ed08719c610d1c3503b016bf", + sha256 = "243a3d0d4283c1f8e774814a4096961288a00a2662e84b3cd564afbf500bb0ad", + strip_prefix = "ruy-c347b02c23cfc459678db6d7c230d76fac00f76d", urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip", - "https://github.com/google/ruy/archive/1b313682ef8b8fc8ed08719c610d1c3503b016bf.zip", + "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/ruy/archive/c347b02c23cfc459678db6d7c230d76fac00f76d.zip", + "https://github.com/google/ruy/archive/c347b02c23cfc459678db6d7c230d76fac00f76d.zip", ], build_file = "//third_party/ruy:BUILD", ) From 7c1b0d0a37fc4946d24acaceca4df7172ba7c5f6 Mon Sep 17 00:00:00 2001 From: Taehee Jeong Date: Tue, 9 Jun 2020 06:17:36 -0700 Subject: [PATCH 115/178] Add quantization option to Metal delegate Swift API Also updated outdated GPU delegate documentation. PiperOrigin-RevId: 315469909 Change-Id: I7b524373a397763c886905e83a2e8b75226d9471 --- .../swift/Sources/MetalDelegate.swift | 5 + tensorflow/lite/g3doc/performance/gpu.md | 22 +++ .../lite/g3doc/performance/gpu_advanced.md | 138 ++++++++++++------ 3 files changed, 119 insertions(+), 46 deletions(-) diff --git a/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift b/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift index 6cde2533f95..7d7e79de0c1 100644 --- a/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift +++ b/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift @@ -35,6 +35,7 @@ public final class MetalDelegate: Delegate { var delegateOptions = TFLGpuDelegateOptions() delegateOptions.allow_precision_loss = options.allowsPrecisionLoss delegateOptions.wait_type = options.waitType.cWaitType + delegateOptions.enable_quantization = options.isQuantizationEnabled cDelegate = TFLGpuDelegateCreate(&delegateOptions) } @@ -54,6 +55,10 @@ extension MetalDelegate { /// default is `passive`. public var waitType: ThreadWaitType = .passive + /// Indicates whether the GPU delegate allows execution of an 8-bit quantized model. The default + /// is `false`. + public var isQuantizationEnabled = false + /// Creates a new instance with the default values. public init() {} } diff --git a/tensorflow/lite/g3doc/performance/gpu.md b/tensorflow/lite/g3doc/performance/gpu.md index 0a0826b24b3..b8f7c419e5b 100644 --- a/tensorflow/lite/g3doc/performance/gpu.md +++ b/tensorflow/lite/g3doc/performance/gpu.md @@ -179,6 +179,28 @@ delegate.close(); ### iOS +#### Swift + +Initialize TensorFlow Lite interpreter with the GPU delegate. + +```swift +import TensorFlowLite + +// Load model ... + +let delegate = MetalDelegate() + +if let interpreter = try Interpreter(modelPath: modelPath, + delegates: [delegate]) { + // Run inference ... +} + +``` + +#### Objective-C + +Note: For Objective-C, GPU delegate is provided via C API. + In your application code, include the GPU delegate header and call the `Interpreter::ModifyGraphWithDelegate` function to register the GPU delegate to the interpreter: diff --git a/tensorflow/lite/g3doc/performance/gpu_advanced.md b/tensorflow/lite/g3doc/performance/gpu_advanced.md index c0194627392..4f6c4dea9dd 100644 --- a/tensorflow/lite/g3doc/performance/gpu_advanced.md +++ b/tensorflow/lite/g3doc/performance/gpu_advanced.md @@ -126,10 +126,28 @@ bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:gl_deleg bazel build -c opt --config android_arm64 tensorflow/lite/delegates/gpu:libtensorflowlite_gpu_gl.so # for dynamic library ``` -### iOS (ObjC++) +### iOS (Swift) -To use TensorFlow Lite on GPU, get the GPU delegate via `NewGpuDelegate()` and -then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling +Initialize TensorFlow Lite interpreter with the GPU delegate. + +```swift +import TensorFlowLite + +let delegate = MetalDelegate() +if let interpreter = try Interpreter(modelPath: modelPath, + delegates: [delegate]) { + + // Run inference ... +} + +``` + +### iOS (Objective-C) + +Note: For Objective-C, GPU delegate is provided via C API. + +To use TensorFlow Lite on GPU, get the GPU delegate via `TFLGpuDelegateCreate()` +and then pass it to `Interpreter::ModifyGraphWithDelegate()` (instead of calling `Interpreter::AllocateTensors()`). ```c++ @@ -142,12 +160,7 @@ InterpreterBuilder(*model, op_resolver)(&interpreter); // NEW: Prepare GPU delegate. -const GpuDelegateOptions options = { - .allow_precision_loss = false, - .wait_type = kGpuDelegateOptions::WaitType::Passive, -}; - -auto* delegate = NewGpuDelegate(options); +auto* delegate = TFLGpuDelegateCreate(/*default options=*/nullptr); if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) return false; // Run inference. @@ -156,7 +169,7 @@ if (interpreter->Invoke() != kTfLiteOk) return false; ReadFromOutputTensor(interpreter->typed_output_tensor(0)); // Clean up. -DeleteGpuDelegate(delegate); +TFLGpuDelegateDelete(delegate); ``` Note: When calling `Interpreter::ModifyGraphWithDelegate()` or @@ -169,7 +182,54 @@ called. ## Advanced usage -### Running quantized models (Experimental, Android only) +### Delegate Options for iOS + +`TFLGpuDelegateCreate()` accepts a `struct` of options. +([C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/metal_delegate.h), +[Swift API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/swift/Sources/MetalDelegate.swift)) + +Passing `nullptr`(C API) or nothing (Swift API) to the initializer sets the +default options (which are explicated in the Basic Usage example above). + +**Swift API** + +```swift + +// THIS: +var options = MetalDelegate.Options() +options.allowsPrecisionLoss = false +options.waitType = .passive +options.isQuantizationEnabled = false +let delegate = MetalDelegate(options: options) + +// IS THE SAME AS THIS: +let delegate = MetalDelegate() + +``` + +**C API (also used for Objective-C)** + +```c++ + +// THIS: +const TFLGpuDelegateOptions options = { + .allow_precision_loss = false, + .wait_type = TFLGpuDelegateWaitType::TFLGpuDelegateWaitTypePassive, + .enable_quantization = false, +}; + +auto* delegate = TFLGpuDelegateCreate(options); + +// IS THE SAME AS THIS: +auto* delegate = TFLGpuDelegateCreate(nullptr); + +``` + +While it is convenient to use `nullptr`, we recommend that you explicitly set +the options, to avoid any unexpected behavior if default values are changed in +the future. + +### Running quantized models (Experimental) The GPU delegate already supports [float16 quantized](https://www.tensorflow.org/lite/performance/post_training_float16_quant) @@ -186,6 +246,8 @@ tensors. This feature can be enabled using delegate options as follows: +#### Android + **C++ API** ```c++ @@ -206,51 +268,30 @@ GpuDelegate delegate = new GpuDelegate(new GpuDelegate.Options().setQuantizedMod Interpreter.Options options = (new Interpreter.Options()).addDelegate(delegate); ``` -### Delegate Options for iOS +#### iOS -`NewGpuDelegate()` accepts a `struct` of options. +**Swift API** -```c++ -struct GpuDelegateOptions { - // Allows to quantify tensors, downcast values, process in float16 etc. - bool allow_precision_loss; - - enum class WaitType { - // waitUntilCompleted - kPassive, - // Minimize latency. It uses active spinning instead of mutex and consumes - // additional CPU resources. - kActive, - // Useful when the output is used with GPU pipeline then or if external - // command encoder is set - kDoNotWait, - }; - WaitType wait_type; -}; +```swift +// NEW: Prepare custom options with feature enabled. +var options = MetalDelegate.Options() +options.isQuantizationEnabled = true +let delegate = MetalDelegate(options: options) ``` -Passing `nullptr` into `NewGpuDelegate()` sets the default options (which are -explicated in the Basic Usage example above). +**C API (also used for Objective-C)** -```c++ +```c // THIS: -const GpuDelegateOptions options = { - .allow_precision_loss = false, - .wait_type = kGpuDelegateOptions::WaitType::Passive, +// NEW: Prepare custom options with feature enabled. +const TFLGpuDelegateOptions options = { + .enable_quantization = true, }; -auto* delegate = NewGpuDelegate(options); - -// IS THE SAME AS THIS: -auto* delegate = NewGpuDelegate(nullptr); - +auto* delegate = TFLGpuDelegateCreate(options); ``` -While it is convenient to use `nullptr`, we recommend that you explicitly set -the options, to avoid any unexpected behavior if default values are changed in -the future. - ### Input/Output Buffers (iOS only) To do computation on the GPU, data must be made available to the GPU. This often @@ -280,8 +321,13 @@ off by calling `Interpreter::SetAllowBufferHandleOutput(true)` during initialization. ```c++ +#include "tensorflow/lite/delegates/gpu/metal_delegate.h" +#include "tensorflow/lite/delegates/gpu/metal_delegate_internal.h" + +// ... + // Prepare GPU delegate. -auto* delegate = NewGpuDelegate(nullptr); +auto* delegate = TFLGpuDelegateCreate(nullptr); interpreter->SetAllowBufferHandleOutput(true); // disable default gpu->cpu copy if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->inputs()[0], user_provided_input_buffer)) return false; if (!TFLGpuDelegateBindMetalBufferToTensor(delegate, interpreter->outputs()[0], user_provided_output_buffer)) return false; From 3bec74195ec5a242d8783fdfc718d45dc7210c4e Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Tue, 9 Jun 2020 07:40:08 -0700 Subject: [PATCH 116/178] Remove a defensive check that prevented adding Generic as superclass of Tensor. This is required to unblock #40132. PiperOrigin-RevId: 315481237 Change-Id: Ia56c0087ab129499fe815b96ae83564e5a49df8f --- tensorflow/python/framework/ops.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8fee3057b8d..efb1ebbdbc2 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -183,16 +183,8 @@ def _override_helper(clazz_object, operator, func): func: the function that replaces the overridden operator. Raises: - ValueError: If operator has already been overwritten, - or if operator is not allowed to be overwritten. + ValueError: If operator is not allowed to be overwritten. """ - existing = getattr(clazz_object, operator, None) - if existing is not None: - # Check to see if this is a default method-wrapper or slot wrapper which - # will be true for the comparison operators. - if not isinstance(existing, type(object.__lt__)): - raise ValueError("operator %s cannot be overwritten again on class %s." % - (operator, clazz_object)) if operator not in Tensor.OVERLOADABLE_OPERATORS: raise ValueError("Overriding %s is disallowed" % operator) setattr(clazz_object, operator, func) From d2b02580e6f68d2360bd6ad6eda805aa1d4e9871 Mon Sep 17 00:00:00 2001 From: Jakob Buchgraber Date: Tue, 9 Jun 2020 07:59:17 -0700 Subject: [PATCH 117/178] Update TF bazel version requirements. PiperOrigin-RevId: 315483879 Change-Id: I4be29ffa3c425f01c28dc8c95250209a2b0bada4 --- .bazelversion | 2 +- configure.py | 2 +- tensorflow/tools/ci_build/install/install_bazel.sh | 2 +- tensorflow/tools/ci_build/install/install_bazel_from_source.sh | 2 +- tensorflow/tools/ci_build/release/common.sh | 2 +- .../tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile | 2 +- tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile | 2 +- .../tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile | 2 +- tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile | 2 +- .../tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile | 2 +- .../dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.bazelversion b/.bazelversion index 4a36342fcab..fd2a01863fd 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -3.0.0 +3.1.0 diff --git a/configure.py b/configure.py index c2850beede6..da6af8f8cc9 100644 --- a/configure.py +++ b/configure.py @@ -49,7 +49,7 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' _TF_WORKSPACE_ROOT = '' _TF_BAZELRC = '' _TF_CURRENT_BAZEL_VERSION = None -_TF_MIN_BAZEL_VERSION = '2.0.0' +_TF_MIN_BAZEL_VERSION = '3.1.0' _TF_MAX_BAZEL_VERSION = '3.99.0' NCCL_LIB_PATHS = [ diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh index f14740ea6f9..555eb2d935f 100755 --- a/tensorflow/tools/ci_build/install/install_bazel.sh +++ b/tensorflow/tools/ci_build/install/install_bazel.sh @@ -15,7 +15,7 @@ # ============================================================================== # Select bazel version. -BAZEL_VERSION="3.0.0" +BAZEL_VERSION="3.1.0" set +e local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}') diff --git a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh index 3c40f0c0f34..66761217018 100755 --- a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh +++ b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh @@ -18,7 +18,7 @@ # It will compile bazel from source and install it in /usr/local/bin # Select bazel version. -BAZEL_VERSION="3.0.0" +BAZEL_VERSION="3.1.0" set +e local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}') diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh index 0a9f6eae0b3..b533564e7a1 100644 --- a/tensorflow/tools/ci_build/release/common.sh +++ b/tensorflow/tools/ci_build/release/common.sh @@ -17,7 +17,7 @@ # Keep in sync with tensorflow_estimator and configure.py. # LINT.IfChange -LATEST_BAZEL_VERSION=3.0.0 +LATEST_BAZEL_VERSION=3.1.0 # LINT.ThenChange( # //tensorflow/opensource_only/configure.py, # //tensorflow_estimator/google/kokoro/common.sh, diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile index 1024f64bfbb..a215449ef7f 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile @@ -93,7 +93,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile index f20b4c28e3a..f7d414bc902 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile @@ -93,7 +93,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile index 091ac0d3931..e7e717c584c 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile @@ -135,7 +135,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile index 9f7e67eeee3..b18af60892f 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile @@ -135,7 +135,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile index fc6baecb9b6..54fdb2be648 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile @@ -23,7 +23,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Install bazel -ARG BAZEL_VERSION=3.0.0 +ARG BAZEL_VERSION=3.1.0 RUN mkdir /bazel && \ wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \ wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \ diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile index 2b4761abc39..c0327619ad9 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazelbuild.partial.Dockerfile @@ -21,7 +21,7 @@ RUN python3 -m pip --no-cache-dir install \ enum34 # Build and install bazel -ENV BAZEL_VERSION 3.0.0 +ENV BAZEL_VERSION 3.1.0 WORKDIR / RUN mkdir /bazel && \ cd /bazel && \ From 502a4bf641e730ab7a269384b0be1bcdec9c3f61 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 9 Jun 2020 08:31:28 -0700 Subject: [PATCH 118/178] Remove the deps to Keras model from gradients_test.py The Mnist model was not using any Keras model functionality like compile/fit. We can just use a layer to replace it, which already include __call__(). PiperOrigin-RevId: 315488824 Change-Id: Ibbffd05b448f8d02f0211b6f4e4f40e5e065e3de --- tensorflow/python/ops/parallel_for/gradients_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/parallel_for/gradients_test.py b/tensorflow/python/ops/parallel_for/gradients_test.py index a6d29b646a8..fdb70c52778 100644 --- a/tensorflow/python/ops/parallel_for/gradients_test.py +++ b/tensorflow/python/ops/parallel_for/gradients_test.py @@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.keras.engine import training as keras_training from tensorflow.python.layers import layers as tf_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops as tf_control_flow_ops @@ -215,7 +214,7 @@ def create_lstm_per_eg_grad(batch_size, state_size, steps, inputs_size=None): # Importing the code from tensorflow_models seems to cause errors. Hence we # duplicate the model definition here. # TODO(agarwal): Use the version in tensorflow_models/official instead. -class Mnist(keras_training.Model): +class Mnist(tf_layers.Layer): def __init__(self, data_format): """Creates a model for classifying a hand-written digit. From d318b778e90fef1b90ef9bfee7cfc37f3c2e9cfd Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 9 Jun 2020 08:38:56 -0700 Subject: [PATCH 119/178] [tfdbg2] Refactor and simplify code in dumping_callback.py - There is some duplication in the code that creates DebugIdentityV2 ops for tfdbg2. This CL removes some of the duplication. PiperOrigin-RevId: 315490159 Change-Id: Ic8d7003aeb8552d43ddbd5a6731e550ddd7d2f90 --- .../python/debug/lib/dumping_callback.py | 126 ++++++------------ 1 file changed, 40 insertions(+), 86 deletions(-) diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py index 7e61631fb1c..a91ea4676a8 100644 --- a/tensorflow/python/debug/lib/dumping_callback.py +++ b/tensorflow/python/debug/lib/dumping_callback.py @@ -350,8 +350,22 @@ class _DumpingCallback(object): debug_urls = ["file://%s" % self._dump_root] is_v1_graph_mode = not ops.executing_eagerly_outside_functions() instrumented_tensors = [] if is_v1_graph_mode else None - if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: - for output_slot, tensor in enumerate(tensors): + for output_slot, tensor in enumerate(tensors): + with self._symbolic_tensor_counter_lock: + debug_identity_name = ("DebugIdentityV2_%d" % + self._symbolic_tensor_counter) + debug_identity_op_kwargs = { + "tfdbg_context_id": tfdbg_context_id, + "op_name": op_name, + "output_slot": output_slot, + "tensor_debug_mode": self._tensor_debug_mode, + "debug_urls": debug_urls, + "name": debug_identity_name, + } + if tf_compat.forward_compatible(2020, 6, 24): + debug_identity_op_kwargs[ + "circular_buffer_size"] = self._circular_buffer_size + if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: if (not self._should_dump_tensor(op_type, tensor.dtype) or not tensor.dtype.is_numpy_compatible): if is_v1_graph_mode: @@ -364,43 +378,19 @@ class _DumpingCallback(object): continue # Except in V1 graph mode + control flow, debug_identity_v2 triggers # auto control dependency because it's a stateful op. - with self._symbolic_tensor_counter_lock: - debug_identity_name = ("DebugIdentityV2_%d" % - self._symbolic_tensor_counter) - if tf_compat.forward_compatible(2020, 6, 24): - debug_tensor = gen_debug_ops.debug_identity_v2( - # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode - # as a low-overhead placeholder, since no actual tensor value is - # traced. - constant_op.constant([], dtype=dtypes.float32), - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls, - circular_buffer_size=self._circular_buffer_size, - name=debug_identity_name) - else: - debug_tensor = gen_debug_ops.debug_identity_v2( - # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode - # as a low-overhead placeholder, since no actual tensor value is - # traced. - constant_op.constant([], dtype=dtypes.float32), - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls, - name=debug_identity_name) + debug_tensor = gen_debug_ops.debug_identity_v2( + # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode + # as a low-overhead placeholder, since no actual tensor value is + # traced. + constant_op.constant([], dtype=dtypes.float32), + **debug_identity_op_kwargs) if is_v1_graph_mode: instrumented_tensors.append(self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) - return instrumented_tensors - elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH, - debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, - debug_event_pb2.TensorDebugMode.FULL_HEALTH, - debug_event_pb2.TensorDebugMode.SHAPE): - for output_slot, tensor in enumerate(tensors): + elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH, + debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, + debug_event_pb2.TensorDebugMode.FULL_HEALTH, + debug_event_pb2.TensorDebugMode.SHAPE): dtype = tensor.dtype dtype_is_dumpable = ( tensor_debug_mode in ( @@ -415,37 +405,16 @@ class _DumpingCallback(object): if is_v1_graph_mode: instrumented_tensors.append(tensor) continue - if tf_compat.forward_compatible(2020, 6, 24): - debug_tensor = gen_debug_ops.debug_identity_v2( - gen_debug_ops.debug_numeric_summary_v2( - tensor, - tensor_id=tensor_ids[output_slot], - tensor_debug_mode=self._tensor_debug_mode, - output_dtype=dtypes.float64), - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls, - circular_buffer_size=self._circular_buffer_size) - else: - debug_tensor = gen_debug_ops.debug_identity_v2( - gen_debug_ops.debug_numeric_summary_v2( - tensor, - tensor_id=tensor_ids[output_slot], - tensor_debug_mode=self._tensor_debug_mode, - output_dtype=dtypes.float64), - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls) + debug_tensor = gen_debug_ops.debug_identity_v2( + gen_debug_ops.debug_numeric_summary_v2( + tensor, + tensor_id=tensor_ids[output_slot], + tensor_debug_mode=self._tensor_debug_mode, + output_dtype=dtypes.float64), **debug_identity_op_kwargs) if is_v1_graph_mode: instrumented_tensors.append(self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) - return instrumented_tensors - elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: - for output_slot, tensor in enumerate(tensors): + elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: if (not self._should_dump_tensor(op_type, tensor.dtype) or not tensor.dtype.is_numpy_compatible): # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under @@ -453,31 +422,16 @@ class _DumpingCallback(object): if is_v1_graph_mode: instrumented_tensors.append(tensor) continue - if tf_compat.forward_compatible(2020, 6, 24): - debug_tensor = gen_debug_ops.debug_identity_v2( - tensor, - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls, - circular_buffer_size=self._circular_buffer_size) - else: - debug_tensor = gen_debug_ops.debug_identity_v2( - tensor, - tfdbg_context_id=tfdbg_context_id, - op_name=op_name, - output_slot=output_slot, - tensor_debug_mode=self._tensor_debug_mode, - debug_urls=debug_urls) + debug_tensor = gen_debug_ops.debug_identity_v2( + tensor, **debug_identity_op_kwargs) if is_v1_graph_mode: instrumented_tensors.append(self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) - return instrumented_tensors - else: - raise NotImplementedError( - "Symbolic tensor instrumentation is not implemented for debug mode " - "%s" % self._tensor_debug_mode) + else: + raise NotImplementedError( + "Symbolic tensor instrumentation is not implemented for debug mode " + "%s" % self._tensor_debug_mode) + return instrumented_tensors def _dump_eager_tensors(self, tensors, From b18a6f4d7a4dd803be7ee25d300177f45e2119e8 Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 9 Jun 2020 08:45:07 -0700 Subject: [PATCH 120/178] Fix default `main` argument of tf_py_test target PiperOrigin-RevId: 315491112 Change-Id: Ib3b43d9bd92fa084f749826de76ddaba78c4e019 --- tensorflow/tensorflow.bzl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 95b51b2e9b9..50548740157 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -2273,6 +2273,12 @@ def tf_py_test( **kwargs ) if tfrt_enabled_internal: + # None `main` defaults to `name` + ".py" in `py_test` target. However, since we + # are appending _tfrt. it becomes `name` + "_tfrt.py" effectively. So force + # set `main` argument without `_tfrt`. + if main == None: + main = name + ".py" + py_test( name = name + "_tfrt", size = size, From 75f09e6973f84858906983e19d4544a70615ff7f Mon Sep 17 00:00:00 2001 From: Yanhua Sun Date: Tue, 9 Jun 2020 09:16:36 -0700 Subject: [PATCH 121/178] Fix save model issue for ops with a list of functions PiperOrigin-RevId: 315496681 Change-Id: I622550d1a073e4c21c3c7af625cf76481e365dbc --- .../saved_model/function_deserialization.py | 9 ++++++++- tensorflow/python/saved_model/save_test.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py index dccb222c26e..63fa4a4acbd 100644 --- a/tensorflow/python/saved_model/function_deserialization.py +++ b/tensorflow/python/saved_model/function_deserialization.py @@ -400,8 +400,11 @@ def fix_node_def(node_def, functions, shared_name_suffix, debug_name): if node_def.op in functions: node_def.op = functions[node_def.op].name for _, attr_value in node_def.attr.items(): - if attr_value.func.name: + if attr_value.WhichOneof("value") == "func": attr_value.func.name = functions[attr_value.func.name].name + elif attr_value.WhichOneof("value") == "list": + for fn in attr_value.list.func: + fn.name = functions[fn.name].name # Fix old table creation bug. if node_def.op == "HashTableV2": @@ -471,6 +474,10 @@ def _list_function_deps(fdef, library_function_names): for _, attr_value in node_def.attr.items(): if attr_value.WhichOneof("value") == "func": deps.add(attr_value.func.name) + elif attr_value.WhichOneof("value") == "list": + for fn in attr_value.list.func: + deps.add(fn.name) + return deps diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py index 09e7296a483..f94cae8a4de 100644 --- a/tensorflow/python/saved_model/save_test.py +++ b/tensorflow/python/saved_model/save_test.py @@ -44,6 +44,7 @@ from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.lib.io import file_io from tensorflow.python.module import module from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -117,6 +118,24 @@ class SaveTest(test.TestCase): {"output_0": 2.}, _import_and_infer(save_dir, {"x": 1.})) + def test_method_save_list_func(self): + root = tracking.AutoTrackable() + + @def_function.function + def case_fn(x): + branch_index = constant_op.constant(1) + branches = [lambda: x, lambda: x + 1] + case_out = control_flow_ops.switch_case(branch_index, branches) + return case_out + + root.f = def_function.function( + lambda x: 2. * case_fn(x), + input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)]) + root.f(constant_op.constant(1.)) + save_dir = os.path.join(self.get_temp_dir(), "saved_model") + save.save(root, save_dir, root.f) + self.assertEqual({"output_0": 4.}, _import_and_infer(save_dir, {"x": 1.})) + def test_method_save_concrete(self): root = tracking.AutoTrackable() root.f = def_function.function( From ed5227702692fc2612b590009cf51706b88ebe40 Mon Sep 17 00:00:00 2001 From: Cesar Crusius Date: Tue, 9 Jun 2020 09:19:09 -0700 Subject: [PATCH 122/178] Unify convert to constants logic. Before this change there were two different code paths for dealing with graph freezing for v1 and v2 graphs. They largely did the same thing, but each path had and lacked capabilities the other had, and each had its own bugs. This change re-writes the previous v2 logic so it can cope with session-based graphs and allow for the conversion of a subset of the variables, and changes the previous convert_variables_to_constants call to proxy into the new logic. The new logic is built around a more "graphy" algorithm: variables are converted to constants, and that conversion is then propagated through the graph by following the graph edges. This hopefully makes it easier to understand what is going on, and to change it later on. More granular tests were added, in order to check that the right graph manipulations were performed. In order to do that, some graph merging infrastructure had to be created in the test. PiperOrigin-RevId: 315497124 Change-Id: I3a33acc804b5dc9628c208df8fd1b7c59f906ddb --- tensorflow/python/BUILD | 7 +- .../python/framework/convert_to_constants.py | 1392 +++++++++++------ .../framework/convert_to_constants_test.py | 758 +++++++++ .../python/framework/graph_util_impl.py | 273 +--- .../python/framework/graph_util_test.py | 210 --- 5 files changed, 1694 insertions(+), 946 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6efc0252347..b93992246ab 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -6633,6 +6633,7 @@ tf_py_test( deps = [ ":client", ":client_testlib", + ":control_flow_v2_toggles", ":framework", ":framework_for_generated_wrappers", ":math_ops", @@ -6650,9 +6651,11 @@ tf_py_test( python_version = "PY3", tags = ["no_rocm"], deps = [ - "client_testlib", - "framework_test_lib", + ":client_testlib", + ":control_flow_v2_toggles", ":convert_to_constants", + ":framework_test_lib", + ":math_ops", ], ) diff --git a/tensorflow/python/framework/convert_to_constants.py b/tensorflow/python/framework/convert_to_constants.py index 5c260b9983f..87c74c3263d 100644 --- a/tensorflow/python/framework/convert_to_constants.py +++ b/tensorflow/python/framework/convert_to_constants.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import numpy as np from tensorflow.core.framework import attr_value_pb2 @@ -27,20 +28,853 @@ from tensorflow.core.framework import variable_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 -from tensorflow.python.eager import wrap_function from tensorflow.python.framework import dtypes +from tensorflow.python.framework import graph_util +from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.grappler import tf_optimizer from tensorflow.python.ops import array_ops -from tensorflow.python.util import object_identity from tensorflow.python.training.saver import export_meta_graph +from tensorflow.python.util import lazy_loader +from tensorflow.python.util import object_identity +# Lazy load the single eager module to avoid introducing new dependencies for +# graph_util:convert_variables_to_constants (eg in +# tensorflow/contrib/session_bundle:session_bundle_py_test). +wrap_function = lazy_loader.LazyLoader( + "wrap_function", globals(), + "tensorflow.python.eager.wrap_function") _CONDITIONAL_OPS = set(["If", "StatelessIf"]) _LOOP_OPS = set(["While", "StatelessWhile"]) _CONTROL_FLOW_OPS = _CONDITIONAL_OPS.union(_LOOP_OPS) +class _TensorData( + collections.namedtuple("_TensorData", ["numpy", "dtype", "index"])): + """Data about a tensor that was converted to a constant.""" + __slots__ = () + + @property + def dtype_attr(self): + return attr_value_pb2.AttrValue(type=self.dtype) + + +class _EndPoint(collections.namedtuple("_EndPoint", ["convertible", "index"])): + """An endpoint in a graph.""" + __slots__ = () + + def __str__(self): + return "{}[{}]".format(self.convertible, self.index) + + +class _Edge(collections.namedtuple("_Edge", ["source", "destination"])): + """A directed graph edge.""" + __slots__ = () + + def __str__(self): + return "{} -> {}".format(self.source, self.destination) + + +class _Convertible(object): + """An entity that can have variables converted to constants.""" + + def __init__(self, enclosing_graph): + self._enclosing_graph = enclosing_graph + self._outgoing_edges = [] + self._converted_self = None + + def converted_self(self): + """A copy of this Convertible to be modified during conversion. + + Returns: + Implementations should return the copied instance, which in turn should + be contained in converted_enclosing_graph(). This instance is the one that + will be modified during conversion. Its main use will be in the + implementations of convert_variable_to_constant(). + """ + raise NotImplementedError() + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + """Converts a variable in this Convertible and its dependencies. + + This method should make sure that a converted copy of itself is present in + the converted graph, and that all Convertibles depending on this one also go + through the same process. + + Args: + incoming_edge: The graph edge into this Convertible that is being + converted to a constant. + tensor_data: The tensor representing the constant. + """ + raise NotImplementedError() + + def create_edges(self): + """Calls add_outgoing_edge for all edges known to this Convertible. + + This is used to build the graph dependencies, so that conversion of + variables to constants can be properly propagated through the graph. Usually + this method will call add_outgoing_edge() to all the Convertible inputs. + """ + raise NotImplementedError() + + def add_outgoing_edge(self, edge): + """Adds an outgoing edge to the Convertible's list of edges. + + Args: + edge: The outgoing edge (its source should be 'self'). + """ + self._outgoing_edges.append(edge) + + @property + def converted_enclosing_graph(self): + """The graph being converted.""" + return self._enclosing_graph.converted_self() + + @property + def outgoing_edges(self): + """The list of edges starting at this Convertible.""" + return self._outgoing_edges + + +class _Function(_Convertible): + """A library function Convertible. + + Edges into functions are edges from node _inputs_ into function _inputs_: + Functions get their input from their callers, not from node outputs, and the + callers in turn get those values as inputs. + """ + + def __init__(self, function, enclosing_graph): + super(_Function, self).__init__(enclosing_graph) + self._function = function + self._nodes = { + n.name: + _Node.new(node=n, function=self, enclosing_graph=enclosing_graph) + for n in function.node_def + } + + def __str__(self): + return self.function.signature.name + + @property + def function(self): + return self._function + + @property + def nodes(self): + return self._nodes + + def converted_self(self): + """The Function copy to be converted. + + The copy will be renamed according to the graph's converted_function_name + map, to ensure the name does not match anything currently in TensorFlow's + function cache. + + Returns: + The function instance to be converted. + """ + if self._converted_self is None: + old_name = self.function.signature.name + new_name = self._enclosing_graph.converted_function_names[old_name] + self.converted_enclosing_graph.rename_function(old_name, new_name) + self._converted_self = self.converted_enclosing_graph.functions[new_name] + return self._converted_self + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + """Converts one function argument into a constant. + + Args: + incoming_edge: The edge into the argument to be converted. + tensor_data: The constant value. + """ + function = self.converted_self().function + index = incoming_edge.destination.index + function.signature.input_arg[index].type = tensor_data.dtype + + for edge in self.outgoing_edges: + if edge.source.index == index: + edge.destination.convertible.convert_variable_to_constant( + edge, tensor_data) + + def create_edges(self): + for n in self._nodes.values(): + n.create_edges() + + +class _Node(_Convertible): + """A Convertible NodeDef.""" + + def __init__(self, node, function, enclosing_graph): + super(_Node, self).__init__(enclosing_graph) + self._node = node + self._function = function + + def __str__(self): + return self._node.name + + @staticmethod + def new(node, function, enclosing_graph): + """Creates a new _Node base on its operation type.""" + if node.op in ["VariableV2", "VarHandleOp", "Placeholder"]: + return _VarHandle(node, function, enclosing_graph) + elif node.op == "Case": + return _Case(node, function, enclosing_graph) + elif node.op == "Merge": + return _Merge(node, function, enclosing_graph) + elif node.op == "PartitionedCall": + return _PartitionedCall(node, function, enclosing_graph) + elif node.op == "ReadVariableOp": + return _ReadVariable(node, function, enclosing_graph) + elif node.op == "ResourceGather": + return _ResourceGather(node, function, enclosing_graph) + elif node.op == "ResourceGatherNd": + return _ResourceGatherNd(node, function, enclosing_graph) + elif node.op in ["If", "StatelessIf"]: + return _If(node, function, enclosing_graph) + elif node.op in ["While", "StatelessWhile"]: + return _While(node, function, enclosing_graph) + elif node.op in ["Enter", "Exit", "Identity", "NextIteration", "Switch"]: + return _Intermediate(node, function, enclosing_graph) + else: + return _Node(node, function, enclosing_graph) + + @property + def node(self): + return self._node + + @property + def container(self): + """The node container (either a graph or a function).""" + if self._function is not None: + return self._function.function + return self._enclosing_graph.graph_def + + def converted_self(self): + """The NodeDef to be converted. + + Returns: + The NodeDef to be converted, which can come from either a graph for a + function. Derived classes should call this (via 'super') to make sure the + node is retrieved from the right place. + """ + if self._converted_self is None: + source = self._function or self._enclosing_graph + self._converted_self = source.converted_self().nodes[self._node.name] + return self._converted_self + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + pass + + def create_edges(self): + for index, name in enumerate(self._node.input): + # Discard edges from control inputs. + if name[0] == "^": + continue + source = self.resolve_input(name) + source.convertible.add_outgoing_edge( + _Edge(source, _EndPoint(self, index))) + + def resolve_input(self, input_name): + """Resolves an input into its _EndPoint. + + A NodeDef's input name can refer to either global NodeDefs (in the + GraphDef's node list), a NodeDef in a function's node list, or a Function + (in the GraphDef's function library). The name can also carry semantic + information, depending on whether it starts with "^". This method handles + all that logic in order to find the object to which the input name refers + to. + + Args: + input_name: The input name to resolve. + + Returns: + The object referred to by 'input_name'. + """ + + # The logic below oversimplifes the semantics, but is good enough for the + # purposes of converting to constants. The introduction of new types of + # operations may change this, forcing the code to be more generic. + # + # In particular, we are assuming that the lack of an index suffix means + # ":0", when it could mean "all the outputs of a node." This works now + # because converting to constants relies very little on output types, and + # when it does it specializes its treatment in dedicated classes. + name_elts = input_name.split(":") + source_name = name_elts[0] + if source_name[0] == "^": + source_name = source_name[1:] + source_index = 0 + if len(name_elts) > 1 and name_elts[-1].isnumeric(): + source_index = int(name_elts[-1]) + + if self._function is None: + return _EndPoint(self._enclosing_graph.nodes[source_name], source_index) + + if source_index != 0 or source_name in self._function.nodes: + return _EndPoint(self._function.nodes[source_name], source_index) + + inputs = [i.name for i in self._function.function.signature.input_arg] + return _EndPoint(self._function, inputs.index(source_name)) + + def update_dtype(self, attr_name, index, dtype): + """Changes the type of a given input. + + Args: + attr_name: The NodeDef attribute containing the type to change. + index: The index of the input type to change. + dtype: The type to change to. + """ + attr = self._node.attr[attr_name] + num_types = 0 + # Check for various 'oneof' possibilities, and update the type if + # index in range. + if attr.HasField("list"): + types = attr.list.type + num_types = len(types) + if num_types > index: + types[index] = dtype + return + elif attr.HasField("type"): + num_types = 1 + if index == 0: + attr.type = dtype + return + raise ValueError( + "Index %d out of range for node(%s).attr(%s), which has %d elements." % + (index, self._node.name, attr_name, num_types)) + + +class _Intermediate(_Node): + """Specialization of _Node to intermediate ops.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + node = self.converted_self() + node.update_dtype("T", incoming_edge.destination.index, tensor_data.dtype) + if "_output_shapes" in node.node.attr: + del node.node.attr["_output_shapes"] + for edge in self.outgoing_edges: + edge.destination.convertible.convert_variable_to_constant( + edge, tensor_data) + + +class _Merge(_Node): + """Specialization of _Node to Merge ops.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + # The Merge operation has a single type for all its inputs, the number of + # which is reflected in the "N" attribute. For the time being, we assume + # that unilaterally changing all of them at once is ok. + super(_Merge, self).convert_variable_to_constant( + _Edge(incoming_edge.source, + _Edge(incoming_edge.destination.convertible, 0)), tensor_data) + + +class _VarHandle(_Node): + """Specialization of _Node to VarHandleOp.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + tensor_proto = tensor_util.make_tensor_proto(tensor_data.numpy, + tensor_data.dtype, + tensor_data.numpy.shape) + + node = self.converted_self().node + node.Clear() + node.name = self._node.name + node.op = "Const" + node.attr["dtype"].CopyFrom(tensor_data.dtype_attr) + node.attr["value"].tensor.CopyFrom(tensor_proto) + + for edge in self.outgoing_edges: + edge.destination.convertible.convert_variable_to_constant( + edge, tensor_data) + + +class _ResourceGather(_Node): + """Specialization of _Node to ResourceGather.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + # We currently skip the conversion if this is inside a function. + if self._function is not None: + return + if self._node.attr["batch_dims"].i != 0: + raise ValueError("batch_dims != 0 is not supported by freeze_graph.") + axis_node_name = self._node.name + "/axis" + axis_dtype = self._node.attr["Tindices"] + axis_data = np.array(self._node.attr["batch_dims"].i) + output_axis_node = self.converted_self().container.node.add() + output_axis_node.name = axis_node_name + output_axis_node.op = "Const" + output_axis_node.attr["dtype"].CopyFrom(axis_dtype) + tensor = tensor_util.make_tensor_proto( + axis_data, dtype=axis_dtype.type, shape=axis_data.shape) + output_axis_node.attr["value"].tensor.CopyFrom(tensor) + + output_node = self.converted_self().node + output_node.Clear() + output_node.name = self._node.name + output_node.op = "GatherV2" + output_node.input.extend( + [self._node.input[0], self._node.input[1], axis_node_name]) + output_node.attr["Tparams"].CopyFrom(self._node.attr["dtype"]) + output_node.attr["Tindices"].CopyFrom(self._node.attr["Tindices"]) + output_node.attr["Taxis"].CopyFrom(axis_dtype) + if "_class" in self._node.attr: + output_node.attr["_class"].CopyFrom(self._node.attr["_class"]) + + +class _ResourceGatherNd(_Node): + """Specialization of _Node to ResourceGatherNd.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + output_node = self.converted_self().node + output_node.Clear() + output_node.name = self._node.name + output_node.op = "GatherNd" + output_node.input.extend([self._node.input[0], self._node.input[1]]) + output_node.attr["Tparams"].CopyFrom(self._node.attr["dtype"]) + output_node.attr["Tindices"].CopyFrom(self._node.attr["Tindices"]) + if "_class" in self._node.attr: + output_node.attr["_class"].CopyFrom(self._node.attr["_class"]) + + +class _ReadVariable(_Node): + """Specialization of _Node to ReadVariableOp.""" + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + node = self.converted_self().node + node.Clear() + node.name = self._node.name + node.op = "Identity" + + node.input.append(self._node.input[0]) + node.attr["T"].CopyFrom(self._node.attr["dtype"]) + if "_class" in self._node.attr: + node.attr["_class"].CopyFrom(self._node.attr["_class"]) + + # If the ReadVariableOp is part of a function, then every node having the + # ReadVariableOp one as its input will refer to it using a ":value" + # syntax. We need to change that to ":output". + if self._function is not None: + for edge in self.outgoing_edges: + index = edge.destination.index + dest = edge.destination.convertible.converted_self() + if isinstance(dest, _Node): + input_name_parts = dest.node.input[index].split(":") + if len(input_name_parts) > 1 and input_name_parts[1] == "value": + input_name_parts[1] = "output" + dest.node.input[index] = ":".join(input_name_parts) + + +class _FunctionCaller(_Node): + """A base class for Convertibles that reference functions.""" + + def __init__(self, node, function, enclosing_graph, first_function_input, + type_attribute, function_attributes): + """Initializes a _FunctionCaller. + + Args: + node: As in _Node. + function: As in _Node. + enclosing_graph: As in _Node. + first_function_input: The index of the first NodeDef input that is tied to + the function inputs. It is assumed that the rest of the NodeDef inputs + map one to one to function inputs. + type_attribute: The name of the NodeDef attribute that defines the input + types. It is assumed that the types listed here map one-to-one with the + function inputs (that is, they do _not_ specify types for inputs that + are not passed to functions). + function_attributes: The names of the NodeDef attributes containing + references to functions. + """ + super(_FunctionCaller, self).__init__(node, function, enclosing_graph) + self._first_function_input = first_function_input + self._type_attribute = type_attribute + self._function_attributes = function_attributes + + def converted_self(self): + if self._converted_self is None: + node = super(_FunctionCaller, self).converted_self().node + converted_names = self._enclosing_graph.converted_function_names + for attr_name in self._function_attributes: + attr = node.attr[attr_name] + if attr.HasField("func"): + attr.func.name = converted_names[attr.func.name] + elif attr.HasField("list"): + for func in attr.list.func: + func.name = converted_names[func.name] + return self._converted_self + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + node = self.converted_self() + index = incoming_edge.destination.index + if index >= self._first_function_input: + node.update_dtype(self._type_attribute, + index - self._first_function_input, tensor_data.dtype) + + # The loop below is reasonable but not correct in general: + # The outgoing edges going into the functions are correct, because the + # inputs map to the function inputs. But the edges going into other nodes do + # not take into account the logic of the body function, which may do + # arbitrary things to the node's output: + # + # while x < 0: + # return y + # + # In this case, the node's ":0" output may map to its ":1 input". For the + # time being, then, we only process edges into functions. + for edge in self.outgoing_edges: + dest = edge.destination.convertible + if edge.source.index == index and isinstance(dest, _Function): + dest.convert_variable_to_constant(edge, tensor_data) + + def create_edges(self): + """Creates edges related to a function caller. + + Edges from a function caller to its called functions are always edges from + _inputs_ to _inputs_: a FunctionDef input is given by the caller, based on + its own inputs. + """ + super(_FunctionCaller, self).create_edges() + for attr_name in self._function_attributes: + attr = self._node.attr[attr_name] + if attr.HasField("func"): + function = self._enclosing_graph.functions[attr.func.name] + for index in range(len(self._node.input) - self._first_function_input): + self.add_outgoing_edge( + _Edge( + _EndPoint(self, index + self._first_function_input), + _EndPoint(function, index))) + elif attr.HasField("list"): + for func in attr.list.func: + function = self._enclosing_graph.functions[func.name] + for index in range( + len(self._node.input) - self._first_function_input): + self.add_outgoing_edge( + _Edge( + _EndPoint(self, index + self._first_function_input), + _EndPoint(function, index))) + + +class _If(_FunctionCaller): + """Specialization of _Node to If-like operations.""" + + def __init__(self, node, function, enclosing_graph): + super(_If, self).__init__( + node, + function, + enclosing_graph, + first_function_input=1, + type_attribute="Tin", + function_attributes=["then_branch", "else_branch"]) + + +class _Case(_FunctionCaller): + """Specialization of _Node to Case-like operations.""" + + def __init__(self, node, function, enclosing_graph): + super(_Case, self).__init__( + node, + function, + enclosing_graph, + first_function_input=1, + type_attribute="Tin", + function_attributes=["branches"]) + + +class _PartitionedCall(_FunctionCaller): + """Specialization of _Node to PartitionedCall-like operations.""" + + def __init__(self, node, function, enclosing_graph): + super(_PartitionedCall, self).__init__( + node, + function, + enclosing_graph, + first_function_input=0, + type_attribute="Tin", + function_attributes=["f"]) + + +class _While(_FunctionCaller): + """Specialization of _Node to While-like operations.""" + + def __init__(self, node, function, enclosing_graph): + super(_While, self).__init__( + node, + function, + enclosing_graph, + first_function_input=0, + type_attribute="T", + function_attributes=["body", "cond"]) + + def convert_variable_to_constant(self, incoming_edge, tensor_data): + super(_While, self).convert_variable_to_constant(incoming_edge, tensor_data) + node = self.converted_self() + node.node.attr["output_shapes"].list.shape[ + incoming_edge.destination.index].CopyFrom( + tensor_shape_pb2.TensorShapeProto(dim=[ + tensor_shape_pb2.TensorShapeProto.Dim(size=dim) + for dim in tensor_data.numpy.shape + ])) + # The while's body inputs and outputs have the same type, so here we can go + # ahead and change that function's output type. + body_name = self._node.attr["body"].func.name + body = self._enclosing_graph.functions[body_name].converted_self().function + body.signature.output_arg[ + incoming_edge.destination.index].type = tensor_data.dtype + + +class _GraphDef(_Convertible): + """A convertible GraphDef.""" + + def __init__(self, graph_def): + super(_GraphDef, self).__init__(enclosing_graph=None) + self._graph_def = graph_def + self._nodes = { + n.name: _Node.new(node=n, function=None, enclosing_graph=self) + for n in graph_def.node + } + self._functions = { + f.signature.name: _Function(f, enclosing_graph=self) + for f in graph_def.library.function + } + self.create_edges() + self._converted_function_names = None + + @property + def graph_def(self): + return self._graph_def + + @property + def nodes(self): + return self._nodes + + @property + def functions(self): + return self._functions + + @property + def converted_function_names(self): + """Map from original to new function names. + + In order to avoid conflicts (two functions with the same name, one converted + and one not), we need to change the name of every converted function to + something that is hopefully unique. + + Returns: + Map from original to new suggested function names. + """ + if self._converted_function_names is None: + parsed_names = [] # List of (id, base_name, original_name) + for name in self.functions: + elements = name.rsplit("_", 1) + if len(elements) == 2 and elements[1].isnumeric(): + parsed_names.append((int(elements[1]), elements[0], name)) + else: + parsed_names.append((-1, name, name)) + self._converted_function_names = { + name: "{}_frozen_{}".format(base_name, ops.uid()) + for (_, base_name, name) in sorted(parsed_names) + } + + return self._converted_function_names + + def rename_function(self, old_name, new_name): + func = self.functions.pop(old_name) + func.function.signature.name = new_name + self.functions[new_name] = func + + def converted_self(self): + if self._converted_self is None: + copied_graph = graph_pb2.GraphDef() + copied_graph.CopyFrom(self._graph_def) + self._converted_self = _GraphDef(copied_graph) + return self._converted_self + + def create_edges(self): + for n in self._nodes.values(): + n.create_edges() + for f in self._functions.values(): + f.create_edges() + + +class _ConverterData(object): + """Container for constant conversion supporting data. + + The data includes the graph being converted, and the pre-converted + tensors. This class will be specialized for ConcreteFunction and Session-based + conversions, as the means to obtain that data is different for each case. + """ + + def __init__(self, + graph_def, + variable_names_whitelist=None, + variable_names_blacklist=None): + self._graph_def = graph_def + self._tensor_data = {} + self._build_node_defs_list() + self._variable_names_whitelist = variable_names_whitelist + self._variable_names_blacklist = variable_names_blacklist + + @property + def graph_def(self): + """The graph to be converted.""" + return self._graph_def + + @property + def node_defs(self): + """All the node defs in the graph to be converted. + + Returns: + A map from node name to the NodeDef for all NodeDefs in the graph, as well + as all control flow NodeDefs in the functions. + """ + return self._node_defs + + @property + def tensor_data(self): + """A map from tensor name to its converted _TensorData.""" + return self._tensor_data + + def _should_convert(self, name): + """Checks whether to convert the given variable name to a constant.""" + return (self._variable_names_whitelist is None or + name in self._variable_names_whitelist) and ( + self._variable_names_blacklist is None or + name not in self._variable_names_blacklist) + + def _build_node_defs_list(self): + """Builds the list of NodeDefs in the GraphDef. + + This list consists of all NodeDefs in the main graph as well as all control + flow NodeDefs in the functions. + + The remaining NodeDefs in the functions are not included because the op + names + are not unique and the variables are handled differently than the main + graph. + The control flow ops need to be extracted because they are need their + attributes to be updated similar to the control flow ops in the main graph. + """ + self._node_defs = {node.name: node for node in self._graph_def.node} + + if self._graph_def.library: + for func in self._graph_def.library.function: + self._node_defs.update({ + node.name: node + for node in func.node_def + if node.op in _CONTROL_FLOW_OPS + }) + + +class _FunctionConverterData(_ConverterData): + """Container for ConcreteFunction-based conversion data.""" + + def __init__(self, + func, + lower_control_flow, + aggressive_inlining, + variable_names_whitelist=None, + variable_names_blacklist=None): + """Creates the conversion data for the given function. + + Args: + func: ConcreteFunction. + lower_control_flow: Boolean indicating whether or not to lower control + flow ops such as If and While. + aggressive_inlining: Boolean indicating whether or not to to aggressive + function inlining (might be unsafe if function has stateful ops, not + properly connected to control outputs). + variable_names_whitelist: The set of variable names to convert (by + default, all variables are converted). + variable_names_blacklist: The set of variable names to omit converting to + constants. + """ + + self._func = func + # Inline the graph in order to remove functions when possible. + graph_def = _run_inline_graph_optimization(func, lower_control_flow, + aggressive_inlining) + super(_FunctionConverterData, self).__init__( + graph_def, + variable_names_whitelist=variable_names_whitelist, + variable_names_blacklist=variable_names_blacklist) + self._build_tensor_data() + + def _build_tensor_data(self): + """Caches the tensor data for all Placeholders in the given function.""" + map_index_to_variable = {} + for var in self._func.graph.variables: + for idx, captured_input in enumerate(self._func.captured_inputs): + if var.handle is captured_input: # pylint: disable=protected-access + map_index_to_variable[idx] = var + break + + # Iterates through all captures which are represented as Placeholders. + for idx, (val_tensor, name_tensor) in enumerate(self._func.graph.captures): + tensor_name = name_tensor.name.split(":")[0] + if not self._should_convert(tensor_name): + continue + if idx in map_index_to_variable: + data = map_index_to_variable[idx].numpy() + else: + data = val_tensor.numpy() + self._tensor_data[tensor_name] = _TensorData( + numpy=data, + dtype=dtypes.as_dtype(data.dtype).as_datatype_enum, + index=idx) + + # Get data for VariableV2 ops (reference variables) that cannot be lifted. + for node in self.node_defs.values(): + if node.op == "VariableV2": + if not self._should_convert(node.name): + continue + if node.name not in self.tensor_data: + with self._func.graph.as_default(): + identity_node = array_ops.identity( + self._func.graph.as_graph_element(node.name + ":0")) + pruned_graph = self._func.prune([], [identity_node.name])()[0] + self._tensor_data[node.name] = _TensorData( + numpy=pruned_graph.numpy(), + dtype=node.attr["dtype"].type, + index=None) + + +class _SessionConverterData(_ConverterData): + """Container for Session-based conversion data.""" + + def __init__(self, + session, + graph_def, + output_node_names, + variable_names_whitelist=None, + variable_names_blacklist=None): + graph_def = graph_util.extract_sub_graph(graph_def, output_node_names) + super(_SessionConverterData, self).__init__( + graph_def, + variable_names_whitelist=variable_names_whitelist, + variable_names_blacklist=variable_names_blacklist) + + nodes_to_convert = [] + tensor_names_to_convert = [] + for node in self.graph_def.node: + if node.op in ["Variable", "VariableV2", "VarHandleOp"]: + tensor_name = node.name + if not self._should_convert(tensor_name): + continue + if node.op == "VarHandleOp": + tensor_name = tensor_name + "/Read/ReadVariableOp" + nodes_to_convert.append(node) + tensor_names_to_convert.append(tensor_name + ":0") + + if tensor_names_to_convert: + converted_tensors = session.run(tensor_names_to_convert) + for node, tensor_value in zip(nodes_to_convert, converted_tensors): + self._tensor_data[node.name] = _TensorData( + numpy=tensor_value, dtype=node.attr["dtype"].type, index=None) + + def disable_lower_using_switch_merge(graph_def): """Set '_lower_using_switch_merge' attributes to False. @@ -135,247 +969,6 @@ def _run_inline_graph_optimization(func, lower_control_flow, return tf_optimizer.OptimizeGraph(config, meta_graph) -def _get_tensor_name(name): - """Returns the name of the input tensor. - - Args: - name: str - - Returns: - str - """ - return name.split(":")[0] - - -def _get_new_function_name(name): - """Returns the function name with '_frozen' appended. - - Args: - name: str - - Returns: - str - """ - return name + "_frozen" - - -def _get_node_defs_list(graph_def): - """Returns a list of NodeDefs in the GraphDef. - - This list consists of all NodeDefs in the main graph as well as all control - flow NodeDefs in the functions. - - The remaining NodeDefs in the functions are not included because the op names - are not unique and the variables are handled differently than the main graph. - The control flow ops need to be extracted because they are need their - attributes to be updated similar to the control flow ops in the main graph. - - Args: - graph_def: GraphDef proto. - - Returns: - [NodeDef] - """ - node_defs = list(graph_def.node) - - if graph_def.library: - for func in graph_def.library.function: - node_defs.extend( - [node for node in func.node_def if node.op in _CONTROL_FLOW_OPS]) - return node_defs - - -def _get_tensor_data(func): - """Gets the tensor data for all Placeholders in the model. - - Returns a dictionary that maps the tensor name to a dictionary containing: - data: numpy data - index: int index in func.graph.captures - is_variable: bool indicating whether the tensor is a variable or not - - Args: - func: ConcreteFunction. - - Returns: - Dict - """ - tensor_data = {} - map_index_to_variable = {} - for var in func.graph.variables: - for idx, captured_input in enumerate(func.captured_inputs): - if var.handle is captured_input: # pylint: disable=protected-access - map_index_to_variable[idx] = var - break - - # Iterates through all captures which are represented as Placeholders. - for idx, (val_tensor, name_tensor) in enumerate(func.graph.captures): - tensor_name = _get_tensor_name(name_tensor.name) - is_variable = idx in map_index_to_variable - if is_variable: - data = map_index_to_variable[idx].numpy() - else: - data = val_tensor.numpy() - tensor_data[tensor_name] = { - "data": data, - "index": idx, - "is_variable": is_variable, - } - return tensor_data - - -def _get_control_flow_function_data(node_defs, tensor_data, name_to_node): - """Gets the types and shapes for the parameters to the function. - - Creates a map from function name to a list of types and a list of shapes that - correspond with the function arguments. The data is primarily determined from - the corresponding "If" or "While" op. If the argument is a resource variable, - then the type is determined from the type of the data contained within the - Tensor. The shape data is only determined in the case of the "While" op. - - `is_also_output_type` is used to identify the "While" bodies that require the - output types to be updated at the same time the input types are updated. - - Args: - node_defs: List of NodeDefs. - tensor_data: {str name : Tensor}. - name_to_node: Dictionary mapping node name to node object. - - Returns: - {str function name : {"types" : [int representing DataType], - "shapes" : [[int] representing TensorShape]], - "is_also_output_type" : bool} - """ - func_data = {} - - def get_source_node_name_through_identities(node_name): - # Trace the source node along with a chain of Identity nodes. - # For example, given Placeholder -> Identity -> Identity -> node_name - # The function will return the name of the Placeholder. - while name_to_node[node_name].op == "Identity": - node_name = _get_tensor_name(name_to_node[node_name].input[0]) - return node_name - - def get_resource_type(node_name): - node_name = get_source_node_name_through_identities(node_name) - - numpy_type = tensor_data[node_name]["data"].dtype - return dtypes.as_dtype(numpy_type).as_datatype_enum - - def get_resource_shape(node_name): - node_name = get_source_node_name_through_identities(node_name) - - return tensor_shape_pb2.TensorShapeProto(dim=[ - tensor_shape_pb2.TensorShapeProto.Dim(size=dim) - for dim in tensor_data[node_name]["data"].shape - ]) - - def add_value(func_name, arg_types, output_shapes, is_also_output_type): - func_data[func_name] = { - "types": arg_types, - "shapes": output_shapes, - "is_also_output_type": is_also_output_type - } - - for node in node_defs: - if node.op in _CONDITIONAL_OPS: - arg_types = [dtype for dtype in node.attr["Tin"].list.type] - - for idx in range(len(arg_types)): - if arg_types[idx] == dtypes.resource: - # Skip first index which represents the condition. - arg_types[idx] = get_resource_type(node.input[idx + 1]) - - add_value(node.attr["then_branch"].func.name, arg_types, None, False) - add_value(node.attr["else_branch"].func.name, arg_types, None, False) - elif node.op in _LOOP_OPS: - arg_types = [dtype for dtype in node.attr["T"].list.type] - output_shapes = [shape for shape in node.attr["output_shapes"].list.shape] - - for idx in range(len(arg_types)): - if arg_types[idx] == dtypes.resource: - input_name = node.input[idx] - arg_types[idx] = get_resource_type(input_name) - output_shapes[idx] = get_resource_shape(input_name) - - add_value(node.attr["body"].func.name, arg_types, output_shapes, True) - add_value(node.attr["cond"].func.name, arg_types, output_shapes, False) - return func_data - - -def _populate_const_op(output_node, node_name, dtype, data, data_shape): - """Creates a Const op. - - Args: - output_node: TensorFlow NodeDef. - node_name: str node name. - dtype: AttrValue with a populated .type field. - data: numpy data value. - data_shape: Tuple of integers containing data shape. - """ - output_node.op = "Const" - output_node.name = node_name - output_node.attr["dtype"].CopyFrom(dtype) - tensor = tensor_util.make_tensor_proto( - data, dtype=dtype.type, shape=data_shape) - output_node.attr["value"].tensor.CopyFrom(tensor) - - -def _populate_identity_op(output_node, input_node): - """Creates an Identity op from a ReadVariable op. - - Args: - output_node: TensorFlow NodeDef. - input_node: TensorFlow NodeDef. - """ - output_node.op = "Identity" - output_node.name = input_node.name - output_node.input.append(input_node.input[0]) - output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - - -def _populate_if_op(output_node, input_node, function_data): - """Updates the type attributes and function names of If or StatelessIf. - - Args: - output_node: TensorFlow NodeDef. - input_node: TensorFlow NodeDef. - function_data: Map of function names to the list of types and shapes that - correspond with the function arguments. - """ - output_node.CopyFrom(input_node) - then_func = input_node.attr["then_branch"].func.name - output_node.attr["then_branch"].func.name = _get_new_function_name(then_func) - output_node.attr["else_branch"].func.name = _get_new_function_name( - input_node.attr["else_branch"].func.name) - output_node.attr["Tin"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue( - type=function_data[then_func]["types"])) - - -def _populate_while_op(output_node, input_node, function_data): - """Updates the type attributes and function names of While or StatelessWhile. - - Args: - output_node: TensorFlow NodeDef. - input_node: TensorFlow NodeDef. - function_data: Map of function names to the list of types and shapes that - correspond with the function arguments. - """ - output_node.CopyFrom(input_node) - cond_func = input_node.attr["cond"].func.name - output_node.attr["cond"].func.name = _get_new_function_name(cond_func) - output_node.attr["body"].func.name = _get_new_function_name( - input_node.attr["body"].func.name) - output_node.attr["T"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue( - type=function_data[cond_func]["types"])) - output_node.attr["output_shapes"].list.CopyFrom( - attr_value_pb2.AttrValue.ListValue( - shape=function_data[cond_func]["shapes"])) - - def _construct_concrete_function(func, output_graph_def, converted_input_indices): """Constructs a concrete function from the `output_graph_def`. @@ -394,7 +987,8 @@ def _construct_concrete_function(func, output_graph_def, converted_inputs = object_identity.ObjectIdentitySet( [input_tensors[index] for index in converted_input_indices]) not_converted_inputs = [ - tensor for tensor in func.inputs if tensor not in converted_inputs] + tensor for tensor in func.inputs if tensor not in converted_inputs + ] not_converted_inputs_map = { tensor.name: tensor for tensor in not_converted_inputs } @@ -412,242 +1006,34 @@ def _construct_concrete_function(func, output_graph_def, return new_func -def _convert_variables_to_constants_v2_impl(func, - lower_control_flow=True, - aggressive_inlining=False): - """Replaces all the variables in a graph with constants of the same values. +def _replace_variables_by_constants(converter_data): + """Replaces variables by constants on a given graph. - TensorFlow 2.0 function for converting all Variable ops into Const ops holding - the same values. This makes it possible to describe the network fully with a - single GraphDef file, and allows the removal of a lot of ops related to - loading and saving the variables. This function runs Grappler's function - inlining optimization in order to return a single subgraph. - - The current implementation only works for graphs that do not contain any - control flow or embedding related ops. - - Note that the NodeDefs in the returned GraphDef contains the original node - names if they are created by the graph optimization. Converting the GraphDef - to concrete function will lose these debug information. + Given a _ConverterData instance with converted variables in its tensor_data + field, create a new graph where the respective variables are replaced with the + converted constants. Args: - func: ConcreteFunction. - lower_control_flow: Boolean indicating whether or not to lower control flow - ops such as If and While. (default True) - aggressive_inlining: Inlining functions with stateful ops might lead to - undefined execution if function call doesn't have an outgoing control - edge and control outputs (they should be added automatically in TFv2). - Aggressive mode disables safety checks in Grappler function optimizer. + converter_data: A pre-populated _ConverterData instance. Returns: - GraphDef containing a simplified version of the original and converted - input indices that were converted to constants. + The converted graph. """ - # Inline the graph in order to remove functions when possible. - graph_def = _run_inline_graph_optimization(func, lower_control_flow, - aggressive_inlining) + input_graph = _GraphDef(converter_data.graph_def) - # Gets list of all node defs include those in the library. - node_defs = _get_node_defs_list(graph_def) + for tensor_name, tensor_data in converter_data.tensor_data.items(): + input_graph.nodes[tensor_name].convert_variable_to_constant( + None, tensor_data) - # Get mapping from node name to node. - name_to_node = {_get_tensor_name(node.name): node for node in node_defs} + converted_graph = input_graph.converted_self().graph_def - # Get mapping from node name to variable value. - tensor_data = _get_tensor_data(func) + converted_input_indices = { + t.index + for t in converter_data.tensor_data.values() + if t.index is not None + } - # Get mapping from function name to argument types. - function_data = _get_control_flow_function_data( - node_defs, tensor_data, name_to_node) - - # Get variable data for all nodes in `node_defs`. - reference_variables = {} - resource_identities = {} - placeholders = {} - converted_input_indices = set() - - def _save_placeholder(node_name, dtype): - placeholders[node_name] = { - "dtype": dtype, - "data": tensor_data[node_name]["data"], - } - converted_input_indices.add(tensor_data[node_name]["index"]) - - for node in node_defs: - if node.op in _CONDITIONAL_OPS: - # Get dtype and data for resource Placeholders. - then_func = node.attr["then_branch"].func.name - arg_types = function_data[then_func]["types"] - for idx, input_tensor in enumerate(node.input[1:]): - input_name = _get_tensor_name(input_tensor) - if input_name in tensor_data: - dtype = attr_value_pb2.AttrValue(type=arg_types[idx]) - _save_placeholder(_get_tensor_name(input_tensor), dtype) - elif node.op in _LOOP_OPS: - # Get dtype and data for resource Placeholders. - cond_func = node.attr["cond"].func.name - arg_types = function_data[cond_func]["types"] - for idx, input_tensor in enumerate(node.input): - input_name = _get_tensor_name(input_tensor) - if input_name in tensor_data: - dtype = attr_value_pb2.AttrValue(type=arg_types[idx]) - _save_placeholder(_get_tensor_name(input_tensor), dtype) - elif (node.op == "Identity" and node.attr["T"].type == dtypes.resource and - name_to_node[_get_tensor_name(node.input[0])].op in _LOOP_OPS): - # Store the dtype for Identity resource ops that are outputs of While ops. - while_node = name_to_node[_get_tensor_name(node.input[0])] - body_func = while_node.attr["body"].func.name - input_data = node.input[0].split(":") - idx = 0 if len(input_data) == 1 else int(input_data[1]) - - dtype = attr_value_pb2.AttrValue( - type=function_data[body_func]["types"][idx]) - resource_identities[node.name] = dtype - elif node.op == "VariableV2": - # Get data for VariableV2 ops (reference variables) that cannot be lifted. - with func.graph.as_default(): - identity_node = array_ops.identity( - func.graph.as_graph_element(node.name + ":0")) - reference_variables[node.name] = ( - func.prune([], [identity_node.name])()[0]) - elif node.name in tensor_data and not tensor_data[node.name]["is_variable"]: - # Get dtype and data for non-variable Placeholders (ex. values for 1.X - # Const ops that are loaded as Placeholders in 2.0) - _save_placeholder(node.name, node.attr["dtype"]) - elif node.op in ["ReadVariableOp", "ResourceGather", "ResourceGatherNd"]: - # Get dtype and data for Placeholder ops associated with ReadVariableOp - # and ResourceGather ops. There can be an Identity in between the - # resource op and Placeholder. Store the dtype for the Identity ops. - input_name = _get_tensor_name(node.input[0]) - while name_to_node[input_name].op == "Identity": - resource_identities[input_name] = node.attr["dtype"] - input_name = _get_tensor_name(name_to_node[input_name].input[0]) - if name_to_node[input_name].op != "Placeholder": - raise ValueError("Cannot find the Placeholder op that is an input " - "to the ReadVariableOp.") - _save_placeholder(input_name, node.attr["dtype"]) - - # Reconstruct the graph with constants in place of variables. - output_graph_def = graph_pb2.GraphDef() - - for input_node in graph_def.node: - output_node = output_graph_def.node.add() - # Convert VariableV2 ops to Const ops. - if input_node.name in reference_variables: - data = reference_variables[input_node.name] - dtype = attr_value_pb2.AttrValue(type=data.dtype.as_datatype_enum) - _populate_const_op(output_node, input_node.name, dtype, data.numpy(), - data.shape) - # Convert Placeholder ops to Const ops. - elif input_node.name in placeholders: - data = placeholders[input_node.name]["data"] - dtype = placeholders[input_node.name]["dtype"] - _populate_const_op(output_node, input_node.name, dtype, data, data.shape) - # Update the dtype for Identity ops that are inputs to ReadVariableOps. - elif input_node.name in resource_identities: - output_node.CopyFrom(input_node) - output_node.attr["T"].CopyFrom(resource_identities[input_node.name]) - # Convert ReadVariableOps to Identity ops. - elif input_node.op == "ReadVariableOp": - _populate_identity_op(output_node, input_node) - # Convert ResourceGather to Gather ops with a Const axis feeding into it. - elif input_node.op == "ResourceGather": - if input_node.attr["batch_dims"].i != 0: - raise ValueError("batch_dims != 0 is not supported by freeze_graph.") - output_axis_node = output_graph_def.node.add() - axis_node_name = input_node.name + "/axis" - axis_dtype = input_node.attr["Tindices"] - axis_data = np.array(input_node.attr["batch_dims"].i) - _populate_const_op(output_axis_node, axis_node_name, axis_dtype, - axis_data, axis_data.shape) - - output_node.op = "GatherV2" - output_node.name = input_node.name - output_node.input.extend( - [input_node.input[0], input_node.input[1], axis_node_name]) - output_node.attr["Tparams"].CopyFrom(input_node.attr["dtype"]) - output_node.attr["Tindices"].CopyFrom(input_node.attr["Tindices"]) - output_node.attr["Taxis"].CopyFrom(axis_dtype) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - elif input_node.op == "ResourceGatherNd": - output_node.op = "GatherNd" - output_node.name = input_node.name - output_node.input.extend( - [input_node.input[0], input_node.input[1]]) - output_node.attr["Tparams"].CopyFrom(input_node.attr["dtype"]) - output_node.attr["Tindices"].CopyFrom(input_node.attr["Tindices"]) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - # Update the function names and argument types for the conditional ops. - elif input_node.op in _CONDITIONAL_OPS: - _populate_if_op(output_node, input_node, function_data) - elif input_node.op in _LOOP_OPS: - _populate_while_op(output_node, input_node, function_data) - else: - output_node.CopyFrom(input_node) - - # Add functions to reconstructed graph. - if graph_def.library: - library = output_graph_def.library - - for input_library_func in graph_def.library.function: - orig_func_name = input_library_func.signature.name - new_func_name = _get_new_function_name(orig_func_name) - - # Do not copy any functions that aren't being used in the graph. Any - # functions that are not used by control flow should have been inlined. - if orig_func_name not in function_data: - continue - - output_library_func = library.function.add() - for key, value in input_library_func.ret.items(): - output_library_func.ret[key] = value - for key, value in input_library_func.control_ret.items(): - output_library_func.control_ret[key] = value - - # Update the input types in the function signature. Update the output - # types for functions that are while loop bodies. - output_library_func.signature.CopyFrom(input_library_func.signature) - output_library_func.signature.name = new_func_name - for dtype, arg in zip(function_data[orig_func_name]["types"], - output_library_func.signature.input_arg): - arg.type = dtype - if function_data[orig_func_name]["is_also_output_type"]: - for dtype, arg in zip(function_data[orig_func_name]["types"], - output_library_func.signature.output_arg): - arg.type = dtype - - # Update the NodeDefs. - func_variables = { - node.name: node.input[0] - for node in input_library_func.node_def - if node.op == "ReadVariableOp" - } - - for input_node in input_library_func.node_def: - output_node = output_library_func.node_def.add() - # Convert ReadVariableOps to Identity ops. - if input_node.op == "ReadVariableOp": - _populate_identity_op(output_node, input_node) - # Update the function names and argument types for the conditional ops. - elif input_node.op in _CONDITIONAL_OPS: - _populate_if_op(output_node, input_node, function_data) - elif input_node.op in _LOOP_OPS: - _populate_while_op(output_node, input_node, function_data) - else: - output_node.CopyFrom(input_node) - # Convert :value to :output for ops that use the ReadVariableOp. - for idx, full_name in enumerate(input_node.input): - input_name = _get_tensor_name(full_name) - if input_name in func_variables: - full_name_parts = full_name.split(":") - full_name_parts[1] = "output" - input_name = ":".join(full_name_parts) - output_node.input[idx] = input_name - - output_graph_def.versions.CopyFrom(graph_def.versions) - return (output_graph_def, converted_input_indices) + return converted_graph, converted_input_indices def convert_variables_to_constants_v2(func, @@ -675,9 +1061,17 @@ def convert_variables_to_constants_v2(func, Returns: ConcreteFunction containing a simplified version of the original. """ - output_graph_def, converted_inputs = _convert_variables_to_constants_v2_impl( - func, lower_control_flow, aggressive_inlining) - return _construct_concrete_function(func, output_graph_def, converted_inputs) + + converter_data = _FunctionConverterData( + func=func, + lower_control_flow=lower_control_flow, + aggressive_inlining=aggressive_inlining) + + output_graph_def, converted_input_indices = _replace_variables_by_constants( + converter_data=converter_data) + + return _construct_concrete_function(func, output_graph_def, + converted_input_indices) def convert_variables_to_constants_v2_as_graph(func, @@ -702,7 +1096,51 @@ def convert_variables_to_constants_v2_as_graph(func, the intermediate GraphDef containing the node debug information for the transformations in the frozen phase. """ - graph_def, converted_inputs = _convert_variables_to_constants_v2_impl( - func, lower_control_flow, aggressive_inlining) - frozen_func = _construct_concrete_function(func, graph_def, converted_inputs) - return frozen_func, graph_def + converter_data = _FunctionConverterData( + func=func, + lower_control_flow=lower_control_flow, + aggressive_inlining=aggressive_inlining) + + output_graph_def, converted_input_indices = _replace_variables_by_constants( + converter_data=converter_data) + + frozen_func = _construct_concrete_function(func, output_graph_def, + converted_input_indices) + return frozen_func, output_graph_def + + +def convert_variables_to_constants_from_session_graph( + session, + graph_def, + output_node_names, + variable_names_whitelist=None, + variable_names_blacklist=None): + """Replaces all the variables in a graph with constants of the same values. + + This function works similarly to convert_variables_to_constants_v2, but it + retrieves the constant values from a Session instead of from a + ConcreteFunction. This is useful when converting graphs generated from + TensorFlow V1, where ConcreteFunctions are not available. This also differs + from graph_util.convert_variables_to_constants in that it supports resource + variables when V2 control flow constructions are present. + + Args: + session: Active TensorFlow session containing the variables. + graph_def: A GraphDef to convert. + output_node_names: List of name strings for the result nodes of the graph. + variable_names_whitelist: The set of variable names to convert (by default, + all variables are converted). + variable_names_blacklist: The set of variable names to omit converting to + constants. + + Returns: + An optimized GraphDef. + """ + graph_def, _ = _replace_variables_by_constants( + converter_data=_SessionConverterData( + session=session, + graph_def=graph_def, + output_node_names=output_node_names, + variable_names_whitelist=variable_names_whitelist, + variable_names_blacklist=variable_names_blacklist)) + return graph_def diff --git a/tensorflow/python/framework/convert_to_constants_test.py b/tensorflow/python/framework/convert_to_constants_test.py index 56a2e2b13a4..b1e11003939 100644 --- a/tensorflow/python/framework/convert_to_constants_test.py +++ b/tensorflow/python/framework/convert_to_constants_test.py @@ -19,33 +19,129 @@ from __future__ import division from __future__ import print_function import os +import re import numpy as np +from google.protobuf import text_format +from tensorflow.core.framework import function_pb2 +from tensorflow.core.framework import graph_pb2 +from tensorflow.core.framework import node_def_pb2 +from tensorflow.core.framework import op_def_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import convert_to_constants from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util +from tensorflow.python.grappler import tf_optimizer from tensorflow.python.ops import array_ops from tensorflow.python.ops import cond_v2 from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import control_flow_v2_toggles +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.ops import while_v2 from tensorflow.python.platform import test from tensorflow.python.saved_model import simple_save from tensorflow.python.saved_model.load import load from tensorflow.python.saved_model.save import save +from tensorflow.python.training.saver import export_meta_graph from tensorflow.python.training.tracking import tracking from tensorflow.python.util import nest +class _GraphMerger(object): + """GraphDef merging methods for testing purposes.""" + + @staticmethod + def merge_any(x1, x2, empty_fn): + """Merges two values using the message's CopyFrom/MergeFrom methods.""" + merged = empty_fn() + merged.CopyFrom(x1) + merged.MergeFrom(x2) + return merged + + @staticmethod + def merge_nodes(node1, node2): + """Merges two NodeDef messages.""" + merged = _GraphMerger.merge_any(node1, node2, node_def_pb2.NodeDef) + merged_inputs = node1.input[:] + merged_inputs.extend([i for i in node2.input[:] if i not in merged_inputs]) + merged.input[:] = merged_inputs + return merged + + @staticmethod + def merge_lists(repeated1, repeated2, empty_fn, key_fn, merge_fn): + """Merges two lists representing maps.""" + merged = {} + xs1 = {key_fn(x): x for x in repeated1} + xs2 = {key_fn(x): x for x in repeated2} + for name in set().union(xs1.keys(), xs2.keys()): + x1 = empty_fn() if name not in xs1 else xs1[name] + x2 = empty_fn() if name not in xs2 else xs2[name] + merged[name] = merge_fn(x1, x2) + return sorted(merged.values(), key=key_fn) + + @staticmethod + def merge_node_lists(repeated_nodes1, repeated_nodes2): + """Merges two repeated node fields.""" + return _GraphMerger.merge_lists(repeated_nodes1, repeated_nodes2, + node_def_pb2.NodeDef, lambda n: n.name, + _GraphMerger.merge_nodes) + + @staticmethod + def merge_functions(fn1, fn2): + """Merges two FunctionDefs.""" + merged = _GraphMerger.merge_any(fn1, fn2, function_pb2.FunctionDef) + + del merged.signature.input_arg[:] + merged.signature.input_arg.extend( + _GraphMerger.merge_lists( + fn1.signature.input_arg[:], fn2.signature.input_arg[:], + op_def_pb2.OpDef.ArgDef, lambda a: a.name, + lambda x, y: _GraphMerger.merge_any(x, y, op_def_pb2.OpDef.ArgDef))) + + del merged.signature.output_arg[:] + merged.signature.output_arg.extend( + _GraphMerger.merge_lists( + fn1.signature.output_arg[:], fn2.signature.output_arg[:], + op_def_pb2.OpDef.ArgDef, lambda a: a.name, + lambda x, y: _GraphMerger.merge_any(x, y, op_def_pb2.OpDef.ArgDef))) + + del merged.node_def[:] + merged.node_def.extend( + _GraphMerger.merge_node_lists(fn1.node_def[:], fn2.node_def[:])) + + return merged + + @staticmethod + def merge_graphs(graph1, graph2): + """Merges two GraphDef messages.""" + merged = graph_pb2.GraphDef() + merged.node.extend( + _GraphMerger.merge_node_lists(graph1.node[:], graph2.node[:])) + + merged.library.function.extend( + _GraphMerger.merge_lists(graph1.library.function, + graph2.library.function, + function_pb2.FunctionDef, + lambda f: f.signature.name, + _GraphMerger.merge_functions)) + + return merged + + class VariablesToConstantsTest(test.TestCase): def _freezeModel(self, model): @@ -325,6 +421,7 @@ class VariablesToConstantsTest(test.TestCase): cell, seq, dtype=dtypes.float32, sequence_length=[1]) root, output_func = self._freezeModel(model) + self._testConvertedFunction(root, root.f, output_func, input_data) @test_util.run_v2_only @@ -347,6 +444,7 @@ class VariablesToConstantsTest(test.TestCase): return control_flow_ops.while_loop(condition, body, [x]) root, output_func = self._freezeModel(model) + self._testConvertedFunction(root, root.f, output_func, input_data) @test_util.run_v2_only @@ -389,5 +487,665 @@ class VariablesToConstantsTest(test.TestCase): self._testConvertedFunction(root, root.f, output_func, input_data) +class ConvertVariablesToConstantsSessionTest(test.TestCase): + + def _assertGraphContains(self, graph, subgraph): + """Asserts that the given subgraph is contained within the given graph.""" + + def normalize_uids(msg): + """Replace auto-id function names with something consistent.""" + # These functions have non-deterministic names, the non-determinism coming + # from having an ops.uid() suffix in their names. We're replacing these + # with new sequential IDs starting from 0 for each prefix, which is + # is sufficient for tests. + if isinstance(msg, graph_pb2.GraphDef): + msg = text_format.MessageToString(msg) + name_prefixes = ["case_cond_true.*", "case_cond_false.*"] + name_regex = r"\b(" + "|".join(name_prefixes) + r")_([0-9]+)\b" + names = {} + for (name, index) in re.findall(name_regex, msg): + names.setdefault(name, set()).add(int(index)) + for name, indices in names.items(): + for new_index, old_index in enumerate(sorted(list(indices))): + msg = re.sub(r"\b" + name + "_" + str(old_index) + r"\b", + name + "_" + str(new_index), msg) + return msg + + norm_graph = text_format.Parse(normalize_uids(graph), graph_pb2.GraphDef()) + norm_subgraph = text_format.Parse( + normalize_uids(subgraph), graph_pb2.GraphDef()) + + # Graph S is contained in C if and only if merge(C,S) == C. + # We merge the input graph with an empty graph to normalize repeated fields: + # assertProtoEquals is sensitive to ordering. + norm_graph = _GraphMerger.merge_graphs(norm_graph, graph_pb2.GraphDef()) + merged_graph = _GraphMerger.merge_graphs(norm_graph, norm_subgraph) + self.assertProtoEquals(norm_graph, merged_graph) + + def _ensure_no_variables_in_graph(self, graph_def): + """Ensures there are no variables in the graph.""" + for node in graph_def.node: + self.assertNotIn( + node.op, ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) + + def _test_variable_to_const_conversion(self, use_resource): + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=use_resource): + variable_node = variable_scope.get_variable( + "variable_node", initializer=1.0) + variable_scope.get_variable("unused_variable_node", initializer=1.0) + output_node = math_ops.multiply(variable_node, 2.0, name="output_node") + with session_lib.Session() as sess: + self.evaluate(variable_node.initializer) + output = self.evaluate(output_node) + self.assertNear(2.0, output, 0.00001) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + session=sess, + graph_def=variable_graph_def, + output_node_names=["output_node"])) + + self._ensure_no_variables_in_graph(constant_graph_def) + + # Now we make sure the variable is now a constant, and that the graph still + # produces the expected result. + with ops.Graph().as_default(): + _ = importer.import_graph_def(constant_graph_def, name="") + self.assertEqual(4, len(constant_graph_def.node)) + self._ensure_no_variables_in_graph(constant_graph_def) + with session_lib.Session() as sess: + output_node = sess.graph.get_tensor_by_name("output_node:0") + output = self.evaluate(output_node) + self.assertNear(2.0, output, 0.00001) + + def test_resource_variable_can_be_written_after_blacklisting(self): + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + variable_node = variable_scope.get_variable( + "variable_node", initializer=1.0) + another_variable = variable_scope.get_variable( + "unused_variable_node", initializer=2.0) + with ops.control_dependencies( + [variable_node.assign(another_variable + variable_node)]): + output_node = array_ops.identity(variable_node, name="output_node") + initializer_name = variable_node.initializer.name + with session_lib.Session() as sess: + self.evaluate(variable_node.initializer) + self.evaluate(another_variable.initializer) + output = self.evaluate(output_node) + self.assertNear(3.0, output, 0.00001) + variable_graph_def = sess.graph.as_graph_def() + + # Test variable name black list. This should result in the variable + # not being a const. Furthermore, the paths that read from and assign + # to the blacklisted variable should continue to be valid. + constant_graph_def_with_blacklist = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + session=sess, + graph_def=variable_graph_def, + output_node_names=["output_node", initializer_name], + variable_names_blacklist=set(["variable_node"]))) + + variable_node = None + for node in constant_graph_def_with_blacklist.node: + if node.name == "variable_node": + variable_node = node + self.assertIsNotNone(variable_node) + self.assertEqual(variable_node.op, "VarHandleOp") + + # Now we make sure another_variable is now a constant, but the original + # variable is not, and that the graph can be executed and update the + # variable can be updated with each execution. + with ops.Graph().as_default(): + _ = importer.import_graph_def(constant_graph_def_with_blacklist, name="") + with session_lib.Session() as sess: + output_node = sess.graph.get_tensor_by_name("output_node:0") + self.evaluate(sess.graph.get_operation_by_name(initializer_name)) + output = self.evaluate(output_node) + self.assertNear(3.0, output, 0.00001) + output = self.evaluate(output_node) + self.assertNear(5.0, output, 0.00001) + + def _inline_functions(self, graph_def, arrays): + meta_graph = export_meta_graph(graph_def=graph_def) + fetch_collection = meta_graph_pb2.CollectionDef() + for name in arrays: + fetch_collection.node_list.value.append(name) + meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) + + # Initialize RewriterConfig with everything disabled except function + # inlining. + config = config_pb2.ConfigProto() + rewrite_options = config.graph_options.rewrite_options + rewrite_options.optimizers.append("function") + return tf_optimizer.OptimizeGraph(config, meta_graph) + + def _test_convert_variables_with_functions(self, inline_functions): + """Freezes a graph with functions.""" + + @function.Defun(dtypes.float32) + def plus_one(x): + return x + 1.0 + + with ops.Graph().as_default(): + variable_node = variables.Variable(1.0, name="variable_node") + _ = variables.Variable(1.0, name="unused_variable_node") + defun_node = plus_one(variable_node) + _ = math_ops.multiply(defun_node, 2.0, name="output_node") + + with session_lib.Session() as sess: + self.evaluate(variables.variables_initializer([variable_node])) + variable_graph_def = sess.graph.as_graph_def() + + if inline_functions: + # Run Grappler to create the VarOpHandle --> Placeholder --> + # ResourceVariable pattern. + variable_graph_def = self._inline_functions( + variable_graph_def, ["variable_node", "output_node"]) + + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + session=sess, + graph_def=variable_graph_def, + output_node_names=["output_node"])) + + self._ensure_no_variables_in_graph(constant_graph_def) + + def testReferenceVariables(self): + """Freezes a graph with reference variables.""" + self._test_variable_to_const_conversion(use_resource=False) + + def testResourceVariables(self): + """Freezes a graph with resource variables.""" + self._test_variable_to_const_conversion(use_resource=True) + + def testWithFunctions(self): + """Freezes a graph with functions.""" + self._test_convert_variables_with_functions(inline_functions=False) + + def testWithInlinedFunctions(self): + """Freezes a graph with functions that have been inlined using Grappler.""" + self._test_convert_variables_with_functions(inline_functions=True) + + def testGraphWithSwitch(self): + """Freezes a graph which contains a Switch with type RESOURCE_DT.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + x = variable_scope.get_variable("var_x", initializer=1.0) + y = variable_scope.get_variable("var_y", initializer=2.0) + f1 = lambda: variable_scope.get_variable("var_f1", initializer=17.0) + f2 = lambda: variable_scope.get_variable("var_f2", initializer=23.0) + cond_node = control_flow_ops.case([(gen_math_ops.less(x, y), f1)], + default=f2) + _ = math_ops.multiply(cond_node, 2.0, name="output_node") + + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + session=sess, + graph_def=variable_graph_def, + output_node_names=["output_node"])) + + self._ensure_no_variables_in_graph(constant_graph_def) + + def testConvertSingleVariable(self): + """Tests that a single variable is properly converted to a constant.""" + + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=False): + _ = variable_scope.get_variable("x", initializer=1.0) + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["x/read"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "x/read" op: "Identity" input: "x" + attr { key: "T" value { type: DT_FLOAT } } + }""") + + def testConvertSingleResourceVariable(self): + """Tests that a resource variable is properly converted to a constant.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + _ = variable_scope.get_variable("x", initializer=1.0) + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["x/Read/ReadVariableOp"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "x/Read/ReadVariableOp" op: "Identity" input: "x" + attr { key: "T" value { type: DT_FLOAT } } + }""") + + def testConvertOneVariableOfTwo(self): + """Tests that one variable can be kept unconverted.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=False): + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=1.0) + _ = math_ops.multiply(x, y, name="out") + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, + variable_graph_def, ["out"], + variable_names_blacklist=["y"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "x/read" op: "Identity" input: "x" + attr { key: "T" value { type: DT_FLOAT } } + } + node { + name: "y" op: "VariableV2" + attr { key: "dtype" value { type: DT_FLOAT } } + } + node { + name: "y/read" op: "Identity" input: "y" + attr { key: "T" value { type: DT_FLOAT } } + } + node { + name: "out" op: "Mul" input: "x/read" input: "y/read" + attr {key: "T" value {type: DT_FLOAT}} + }""") + + def testConvertOneResourceVariableOfTwo(self): + """Tests that one variable can be kept unconverted.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=1.0) + _ = math_ops.multiply(x, y, name="out") + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, + variable_graph_def, ["out"], + variable_names_blacklist=["y"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "y" op: "VarHandleOp" + attr { key: "dtype" value { type: DT_FLOAT } } + } + node { + name: "out/ReadVariableOp" op: "Identity" input: "x" + attr { key: "T" value { type: DT_FLOAT } } + } + node { + name: "out/ReadVariableOp_1" op: "ReadVariableOp" input: "y" + attr { key: "dtype" value { type: DT_FLOAT } } + } + node { + name: "out" op: "Mul" + input: "out/ReadVariableOp" input: "out/ReadVariableOp_1" + attr {key: "T" value {type: DT_FLOAT}} + }""") + + def testConvertIdentityChain(self): + """Tests that a chain of Identity ops is converted properly.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + x = variable_scope.get_variable("x", initializer=1.0) + y = array_ops.identity(x, name="y") + _ = array_ops.identity(y, name="z") + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["z"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "y/ReadVariableOp" op: "Identity" input: "x" + attr { key: "T" value { type: DT_FLOAT } } + } + node { + name: "y" op: "Identity" input: "y/ReadVariableOp" + attr { key: "T" value { type: DT_FLOAT } } + } + node { + name: "z" op: "Identity" input: "y" + attr { key: "T" value { type: DT_FLOAT } } + }""") + + def testConvertCase(self): + """Tests that a v1 case() construction converts properly.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=False): + control_flow_v2_toggles.disable_control_flow_v2() + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=2.0) + _ = control_flow_ops.case([(gen_math_ops.less(x, y), lambda: x)], + default=lambda: y) + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["case/cond/Merge"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "y" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 2 }}} + } + node {name: "x/read" op: "Identity" input: "x"} + node {name: "y/read" op: "Identity" input: "y"} + node {name: "Less" op: "Less" input: "x/read" input: "y/read"} + node {name: "case/cond/pred_id" op: "Identity" input: "Less"} + node { + name: "case/cond/Switch_1" op: "Switch" + input: "case/cond/pred_id" input: "x/read" + } + node { + name: "case/cond/Switch_2" op: "Switch" + input: "case/cond/pred_id" input: "y/read" + } + node { + name: "case/cond/Merge" op: "Merge" + input: "case/cond/Switch_2" input: "case/cond/Switch_1:1" + attr {key: "T" value {type: DT_FLOAT}} + }""") + + def testConvertV2Case(self): + """Tests that a v2 case() converts properly.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=False): + control_flow_v2_toggles.enable_control_flow_v2() + a = variable_scope.get_variable("a", initializer=2.0) + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=2.0) + _ = control_flow_ops.case([(gen_math_ops.less(x, y), lambda: a)], + default=lambda: y) + control_flow_v2_toggles.disable_control_flow_v2() + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["case/cond"])) + self._assertGraphContains( + constant_graph_def, """ + node { + name: "x" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 1 }}} + } + node { + name: "y" op: "Const" + attr { key: "dtype" value { type: DT_FLOAT } } + attr { + key: "value" + value { tensor { dtype: DT_FLOAT tensor_shape{} float_val: 2 }}} + } + node {name: "x/read" op: "Identity" input: "x"} + node {name: "y/read" op: "Identity" input: "y"} + node {name: "Less" op: "Less" input: "x/read" input: "y/read"} + node { + name: "case/cond" op: "StatelessIf" + input: "Less" input: "a/read" input: "y/read" + attr {key: "Tcond" value {type: DT_BOOL}} + attr {key: "Tin" value {list {type: DT_FLOAT type: DT_FLOAT}}} + attr {key: "Tout" value {list {type: DT_FLOAT}}} + } + library { + function { + signature { + name: "case_cond_false_frozen_0" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "y_read_0" type: DT_FLOAT} + output_arg {name: "y_read" type: DT_FLOAT} + } + } + function { + signature { + name: "case_cond_true_frozen_0" + input_arg {name: "a_read_0" type: DT_FLOAT} + input_arg {name: "placeholder" type: DT_FLOAT} + output_arg {name: "a_read" type: DT_FLOAT} + } + } + }""") + + def testConvertV2ResourceCase(self): + """Tests that a v2 case() with resource variables converts properly.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + control_flow_v2_toggles.enable_control_flow_v2() + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=2.0) + _ = control_flow_ops.case([(gen_math_ops.less(x, y), lambda: x)], + default=lambda: y) + control_flow_v2_toggles.disable_control_flow_v2() + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, variable_graph_def, ["case/cond"])) + self._assertGraphContains( + constant_graph_def, """ + node {name: "x" op: "Const"} + node {name: "y" op: "Const"} + node { + name: "case/cond" op: "If" input: "Less" input: "x" input: "y" + attr {key: "Tcond" value {type: DT_BOOL}} + attr {key: "Tin" value {list {type: DT_FLOAT type: DT_FLOAT}}} + attr {key: "Tout" value {list {type: DT_FLOAT}}} + } + library { + function { + signature { + name: "case_cond_false_frozen_0" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "readvariableop_y" type: DT_FLOAT} + output_arg {name: "readvariableop" type: DT_FLOAT} + } + } + function { + signature { + name: "case_cond_true_frozen_0" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "readvariableop_x" type: DT_FLOAT} + output_arg {name: "readvariableop" type: DT_FLOAT} + } + } + }""") + + def testConvertV2UnconvertedResourceNestedCase(self): + """Tests unconverted variable propagation through nested functions.""" + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=True): + control_flow_v2_toggles.enable_control_flow_v2() + x = variable_scope.get_variable("x", initializer=1.0) + y = variable_scope.get_variable("y", initializer=2.0) + z = variable_scope.get_variable("z", initializer=3.0) + # pylint: disable=g-long-lambda + _ = control_flow_ops.case( + [(gen_math_ops.less(x, y), lambda: x)], + default=lambda: control_flow_ops.case( + [(gen_math_ops.less(z, y), lambda: z)], default=lambda: y)) + # pylint: enable=g-long-lambda + control_flow_v2_toggles.disable_control_flow_v2() + with session_lib.Session() as sess: + sess.run(variables.global_variables_initializer()) + variable_graph_def = sess.graph.as_graph_def() + constant_graph_def = ( + convert_to_constants + .convert_variables_to_constants_from_session_graph( + sess, + variable_graph_def, ["case/cond"], + variable_names_blacklist=["y"])) + self._assertGraphContains( + constant_graph_def, """ + node {name: "x" op: "Const"} + node {name: "y" op: "VarHandleOp"} + node {name: "z" op: "Const"} + + node {name: "Less/ReadVariableOp" op: "Identity" input: "x"} + node {name: "Less/ReadVariableOp_1" op: "ReadVariableOp" input: "y"} + + node { + name: "case/cond" op: "If" + input: "x" input: "z" input: "y" + attr { + key: "Tin" + value {list + {type: DT_FLOAT type: DT_FLOAT type: DT_RESOURCE}}} + attr { + key: "_read_only_resource_inputs" + value {list {i: 1 i: 2 i: 3}}} + attr {key: "then_branch" + value {func {name: "case_cond_true_frozen_0"}}} + attr {key: "else_branch" + value {func {name: "case_cond_false_frozen_0"}}} + attr {key: "output_shapes" value {list {shape {}}}} + } + library { + function { + signature { + name: "case_cond_true_frozen_0" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "placeholder_1" type: DT_RESOURCE} + input_arg {name: "readvariableop_x" type: DT_FLOAT} + output_arg {name: "readvariableop" type: DT_FLOAT} + is_stateful: true + } + + node_def {name: "ReadVariableOp" op: "Identity" + input: "readvariableop_x"}} + + function { + signature { + name: "case_cond_false_frozen_0" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "less_readvariableop_1_y" type: DT_RESOURCE} + input_arg {name: "less_readvariableop_z" type: DT_FLOAT} + output_arg {name: "case_cond_identity" type: DT_FLOAT} + is_stateful: true + } + + node_def {name: "Less/ReadVariableOp_1" op: "ReadVariableOp" + input: "less_readvariableop_1_y"} + + node_def {name: "Less/ReadVariableOp" op: "Identity" + input: "less_readvariableop_z"} + + node_def {name: "case/cond" op: "If" + input: "less_readvariableop_z" + input: "less_readvariableop_1_y" + attr { + key: "Tin" + value {list {type: DT_FLOAT type: DT_RESOURCE}}} + attr {key: "then_branch" + value {func {name: "case_cond_true_frozen_1"}}} + attr {key: "else_branch" + value {func {name: "case_cond_false_frozen_1"}}} + attr { + key: "_read_only_resource_inputs" + value {list {i: 1 i: 2}}}}} + + function { + signature { + name: "case_cond_false_frozen_1" + input_arg {name: "placeholder" type: DT_FLOAT} + input_arg {name: "readvariableop_y" type: DT_RESOURCE} + output_arg {name: "readvariableop" type: DT_FLOAT} + is_stateful: true + } + + node_def {name: "ReadVariableOp" op: "ReadVariableOp" + input: "readvariableop_y"}} + + function { + signature { + name: "case_cond_true_frozen_1" + input_arg {name: "placeholder" type: DT_RESOURCE} + input_arg {name: "readvariableop_z" type: DT_FLOAT} + output_arg {name: "readvariableop" type: DT_FLOAT} + is_stateful: true + } + + node_def {name: "ReadVariableOp" op: "Identity" + input: "readvariableop_z"}}}""") + + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index a658dfe0143..3cc28d0a707 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -23,16 +23,19 @@ import re import six -from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import deprecation +from tensorflow.python.util import lazy_loader from tensorflow.python.util.tf_export import tf_export +# A normal import here would generate circular dependencies. +convert_to_constants = lazy_loader.LazyLoader( + "convert_to_constants", globals(), + "tensorflow.python.framework.convert_to_constants") + _VARIABLE_OPS = { "Assign", "AssignAdd", @@ -237,76 +240,6 @@ def tensor_shape_from_node_def_name(graph, input_name): return shape -def _update_resource_identities(resource_identities, output_graph_def, - variable_names_whitelist, - variable_names_blacklist): - """Updates the type of DT_RESOURCE Identity ops. - - Updates the type of the `resource_identities` to the type of the node that - feed into it if the node is not an input to any other node. Valid nodes are - generally colocated nodes. - - Args: - resource_identities: List of NodeDef protos that are Identity ops with the - type DT_RESOURCE. - output_graph_def: GraphDef proto. - variable_names_whitelist: The set of variable names to convert (by default, - all variables are converted). - variable_names_blacklist: The set of variable names to omit converting - to constants. - """ - # Identify the nodes in the graph and the nodes consuming each node. - map_name_to_node = {} - map_name_to_inputs = {} - for node in output_graph_def.node: - map_name_to_node[node.name] = node - for unparsed_input_name in node.input: - if not unparsed_input_name.startswith("^"): - parsed_input_name = _node_name(unparsed_input_name) - if parsed_input_name not in map_name_to_inputs: - map_name_to_inputs[parsed_input_name] = [] - map_name_to_inputs[parsed_input_name].append(node.name) - - for node in resource_identities: - # Validate the node is not an input to other nodes. - if node.name in map_name_to_inputs: - continue - - # Get the type of the Identity node by tracing back through the nodes until - # we come to a non-Identity or non-control flow node or the type of the node - # is not DT_RESOURCE. - input_node = map_name_to_node[_node_name(node.input[0])] - while (input_node.op in _CONTROL_FLOW_OP_NAMES_OR_IDENTITY and - input_node.attr["T"].type == dtypes.resource): - input_node = map_name_to_node[_node_name(input_node.input[0])] - - # Update the type of the Identity node if an Identity, control flow, or - # VarHandleOp node with a type that is not DT_RESOURCE is found. - debugging_message = str.encode( - "This Identity's type was changed from DT_RESOURCE during graph " - "freezing.") - if input_node.attr["T"].type != dtypes.resource: - if (input_node.op in _CONTROL_FLOW_OP_NAMES_OR_IDENTITY - and _should_convert( - input_node.input[0], - variable_names_whitelist, - variable_names_blacklist)): - node.attr["T"].CopyFrom(input_node.attr["T"]) - node.attr["_debugging"].s = debugging_message - elif (input_node.op == "VarHandleOp" - and _should_convert( - input_node.name, - variable_names_whitelist, - variable_names_blacklist)): - node.attr["T"].CopyFrom(input_node.attr["dtype"]) - node.attr["_debugging"].s = debugging_message - - -def _should_convert(name, whitelist, blacklist): - return ((whitelist is None or name in whitelist) - and (blacklist is None or name not in blacklist)) - - @deprecation.deprecated( date=None, instructions="Use `tf.compat.v1.graph_util.convert_variables_to_constants`") @@ -339,190 +272,16 @@ def convert_variables_to_constants(sess, RuntimeError: if a DT_RESOURCE op is found whose ancestor Variables are both blacklisted AND whitelisted for freezing. """ - - get_input_name = lambda node, index=0: node.input[index].split(":")[0] - - def create_const_op(node_name, dtype, data, data_shape=None): - """Creates a Const op.""" - output_node = node_def_pb2.NodeDef() - output_node.op = "Const" - output_node.name = node_name - output_node.attr["dtype"].CopyFrom(dtype) - output_node.attr["value"].CopyFrom( - attr_value_pb2.AttrValue( - tensor=tensor_util.make_tensor_proto( - data, dtype=dtype.type, shape=data_shape))) - return output_node - - # This graph only includes the nodes needed to evaluate the output nodes, and - # removes unneeded nodes like those involved in saving and assignment. - inference_graph = extract_sub_graph(input_graph_def, output_node_names) - - # Identify the ops in the graph. - map_name_to_node = { - node.name: node for node in inference_graph.node - } - - # Get list of variables. - variable_names = [] - variable_dict_names = [] - resource_op_types = {} - - for node in inference_graph.node: - if node.op in ["Variable", "VariableV2", "VarHandleOp"]: - variable_name = node.name - if not _should_convert( - variable_name, variable_names_whitelist, variable_names_blacklist): - continue - variable_dict_names.append(variable_name) - if node.op == "VarHandleOp": - variable_names.append(variable_name + "/Read/ReadVariableOp:0") - else: - variable_names.append(variable_name + ":0") - elif node.op in ["ReadVariableOp", "ResourceGather", "ResourceGatherNd"]: - # There can be one or more Identity or control flow ops in between the - # ReadVariableOp and VarHandleOp. Store the ops with the associated - # dtypes. - source_op_names = [get_input_name(node)] - candidate_resource_op_types = {} - while (source_op_names and map_name_to_node[source_op_names[0]].op in - _CONTROL_FLOW_OP_NAMES_OR_IDENTITY): - source_op_name = source_op_names.pop() - current_node = map_name_to_node[source_op_name] - - if (source_op_name not in resource_op_types and - source_op_name not in candidate_resource_op_types): - candidate_resource_op_types[source_op_name] = node.attr["dtype"] - source_op_names.append(get_input_name(current_node)) - - if current_node == "Merge": - merge_resource_name = get_input_name(current_node, index=1) - if (merge_resource_name not in resource_op_types - and merge_resource_name not in candidate_resource_op_types): - candidate_resource_op_types[merge_resource_name] = ( - node.attr["dtype"]) - source_op_names.append( - get_input_name(map_name_to_node[merge_resource_name])) - - should_convert_all = None - for source_node in source_op_names: - if map_name_to_node[source_node].op != "VarHandleOp": - raise ValueError("Cannot find the variable that is an input " - "to the ReadVariableOp.") - should_convert_node = _should_convert( - source_node, variable_names_whitelist, variable_names_blacklist) - if should_convert_all is None: - should_convert_all = should_convert_node - elif should_convert_all != should_convert_node: - raise RuntimeError( - "Found DT_RESOURCE node whose ancestor Variables are both " - "blacklisted AND whitelisted for freezing. Originating " - "descendant node: {}. Ancestor variables: {}.".format( - node.name, source_op_names)) - if should_convert_all in (None, True): - resource_op_types.update(candidate_resource_op_types) - - # Gets map of variables and the associated data. - if variable_names: - returned_variables = sess.run(variable_names) - else: - returned_variables = [] - variables_data_map = dict(zip(variable_dict_names, returned_variables)) - logging.info("Froze %d variables.", len(returned_variables)) - - def _should_convert_ancestor(node): - input_node = map_name_to_node[_node_name(node.input[0])] - while (input_node.op in _CONTROL_FLOW_OP_NAMES_OR_IDENTITY and - input_node.attr["T"].type == dtypes.resource): - input_node = map_name_to_node[_node_name(input_node.input[0])] - return _should_convert(input_node.name, - variable_names_whitelist, - variable_names_blacklist) - - # Reconstruct the graph with constants in place of variables. - output_graph_def = graph_pb2.GraphDef() - how_many_converted = 0 - for input_node in inference_graph.node: - output_node = node_def_pb2.NodeDef() - if input_node.name in variables_data_map: - data = variables_data_map[input_node.name] - output_node = create_const_op(input_node.name, input_node.attr["dtype"], - data, data.shape) - how_many_converted += 1 - elif input_node.name in resource_op_types: - # Converts the type of the ops between the ReadVariableOp and VarHandleOp - # from RESOURCE_DT to the appropriate type based on the input they are - # referencing. Do not copy shapes due to incorrect shape info. - output_node.op = input_node.op - output_node.name = input_node.name - for in_node in input_node.input: - output_node.input.append(in_node) - for attr_name in input_node.attr: - if str(attr_name) != "_output_shapes": - output_node.attr[attr_name].CopyFrom(input_node.attr[attr_name]) - output_node.attr["T"].CopyFrom(resource_op_types[input_node.name]) - elif (input_node.op == "ReadVariableOp" - and _should_convert_ancestor(input_node)): - # The first branch converts all VarHandleOps of ResourceVariables to - # constants, so we need to convert the associated ReadVariableOps to - # Identity ops. - output_node.op = "Identity" - output_node.name = input_node.name - output_node.input.extend([input_node.input[0]]) - output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - elif (input_node.op == "ResourceGather" - and _should_convert_ancestor(input_node)): - # The first branch converts all VarHandleOps of ResourceGather to - # constants, so we need to convert the associated ResourceGather to Gather - # ops with a Const axis feeding into it. - if input_node.attr["batch_dims"].i != 0: - raise ValueError("batch_dims != 0 is not supported by freeze_graph.") - axis_data = input_node.attr["batch_dims"].i - axis_node_name = input_node.name + "/axis" - axis_dtype = input_node.attr["Tindices"] - output_axis_node = create_const_op(axis_node_name, axis_dtype, axis_data) - output_graph_def.node.extend([output_axis_node]) - - output_node.op = "GatherV2" - output_node.name = input_node.name - output_node.input.extend( - [input_node.input[0], input_node.input[1], axis_node_name]) - output_node.attr["Tparams"].CopyFrom(input_node.attr["dtype"]) - output_node.attr["Tindices"].CopyFrom(input_node.attr["Tindices"]) - output_node.attr["Taxis"].CopyFrom(axis_dtype) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - elif (input_node.op == "ResourceGatherNd" - and _should_convert_ancestor(input_node)): - output_node.op = "GatherNd" - output_node.name = input_node.name - output_node.input.extend( - [input_node.input[0], input_node.input[1]]) - output_node.attr["Tparams"].CopyFrom(input_node.attr["dtype"]) - output_node.attr["Tindices"].CopyFrom(input_node.attr["Tindices"]) - if "_class" in input_node.attr: - output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) - else: - output_node.CopyFrom(input_node) - output_graph_def.node.append(output_node) - - # Update the types of the DT_RESOURCE Identity nodes that do not have an - # associated ReadVariableOp. - resource_identities = [] - for node in output_graph_def.node: - if node.op == "Identity" and node.attr["T"].type == dtypes.resource: - resource_identities.append(node) - if resource_identities: - _update_resource_identities(resource_identities, - output_graph_def, - variable_names_whitelist, - variable_names_blacklist) - - output_graph_def.library.CopyFrom(inference_graph.library) - logging.info("Converted %d variables to const ops.", how_many_converted) - return output_graph_def + ret = convert_to_constants.convert_variables_to_constants_from_session_graph( + session=sess, + graph_def=input_graph_def, + output_node_names=output_node_names, + variable_names_whitelist=variable_names_whitelist, + variable_names_blacklist=variable_names_blacklist) + # The previous code logic generated an empty versions field, we clear it here + # to maintain backwards compatibility. + ret.versions.Clear() + return ret @deprecation.deprecated( diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 46479e20455..d39b8d0a906 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -21,27 +21,15 @@ from __future__ import print_function from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import graph_pb2 from tensorflow.core.framework import node_def_pb2 -from tensorflow.core.protobuf import config_pb2 -from tensorflow.core.protobuf import meta_graph_pb2 -from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import function from tensorflow.python.framework import graph_util -from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_util -from tensorflow.python.grappler import tf_optimizer -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_state_ops -from tensorflow.python.ops import math_ops as math_ops_lib -from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test -from tensorflow.python.training.saver import export_meta_graph # Utility device function to use for testing @@ -316,203 +304,5 @@ class DeviceFunctionsTest(test.TestCase): graph_util.remove_training_nodes(graph_def)) -class ConvertVariablesToConstantsTest(test.TestCase): - - def _ensure_no_variables_in_graph(self, graph_def): - """Ensures there are no variables in the graph.""" - for node in graph_def.node: - self.assertNotIn( - node.op, ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) - - def _test_variable_to_const_conversion(self, use_resource): - with ops.Graph().as_default(): - with variable_scope.variable_scope("", use_resource=use_resource): - variable_node = variable_scope.get_variable( - "variable_node", initializer=1.0) - another_variable = variable_scope.get_variable( - "unused_variable_node", initializer=1.0) - output_node = math_ops_lib.multiply( - variable_node, 2.0, name="output_node") - with session.Session() as sess: - self.evaluate(variable_node.initializer) - output = self.evaluate(output_node) - self.assertNear(2.0, output, 0.00001) - variable_graph_def = sess.graph.as_graph_def() - # First get the constant_graph_def when variable_names_whitelist is - # set, note that if variable_names_whitelist is not set an error will - # be thrown because unused_variable_node is not initialized. - constant_graph_def = graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_whitelist=set(["variable_node"])) - - # Then initialize the unused variable, and get another - # constant_graph_def when variable_names_whitelist is not set. - self.evaluate(another_variable.initializer) - constant_graph_def_without_variable_whitelist = ( - graph_util.convert_variables_to_constants( - sess, variable_graph_def, ["output_node"])) - - # The unused variable should be cleared so the two graphs should be - # equivalent. - self.assertEqual( - str(constant_graph_def), - str(constant_graph_def_without_variable_whitelist)) - - # Test variable name black list. This should result in the variable - # not being a const. - constant_graph_def_with_blacklist = ( - graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_blacklist=set(["variable_node"]))) - variable_node = None - for node in constant_graph_def_with_blacklist.node: - if node.name == "variable_node": - variable_node = node - self.assertIsNotNone(variable_node) - if use_resource: - self.assertEqual(variable_node.op, "VarHandleOp") - else: - self.assertEqual(variable_node.op, "VariableV2") - - # Now we make sure the variable is now a constant, and that the graph still - # produces the expected result. - with ops.Graph().as_default(): - _ = importer.import_graph_def(constant_graph_def, name="") - self.assertEqual(4, len(constant_graph_def.node)) - self._ensure_no_variables_in_graph(constant_graph_def) - with session.Session() as sess: - output_node = sess.graph.get_tensor_by_name("output_node:0") - output = self.evaluate(output_node) - self.assertNear(2.0, output, 0.00001) - - def test_resource_variable_can_be_written_after_blacklisting(self): - with ops.Graph().as_default(): - with variable_scope.variable_scope("", use_resource=True): - variable_node = variable_scope.get_variable( - "variable_node", initializer=1.0) - another_variable = variable_scope.get_variable( - "unused_variable_node", initializer=2.0) - with ops.control_dependencies([ - variable_node.assign(another_variable + variable_node)]): - output_node = array_ops.identity(variable_node, name="output_node") - initializer_name = variable_node.initializer.name - with session.Session() as sess: - self.evaluate(variable_node.initializer) - self.evaluate(another_variable.initializer) - output = self.evaluate(output_node) - self.assertNear(3.0, output, 0.00001) - variable_graph_def = sess.graph.as_graph_def() - - # Test variable name black list. This should result in the variable - # not being a const. Furthermore, the paths that read from and assign - # to the blacklisted variable should continue to be valid. - constant_graph_def_with_blacklist = ( - graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node", initializer_name], - variable_names_blacklist=set(["variable_node"]))) - - variable_node = None - for node in constant_graph_def_with_blacklist.node: - if node.name == "variable_node": - variable_node = node - self.assertIsNotNone(variable_node) - self.assertEqual(variable_node.op, "VarHandleOp") - - # Now we make sure another_variable is now a constant, but the original - # variable is not, and that the graph can be executed and update the - # variable can be updated with each execution. - with ops.Graph().as_default(): - _ = importer.import_graph_def(constant_graph_def_with_blacklist, name="") - with session.Session() as sess: - output_node = sess.graph.get_tensor_by_name("output_node:0") - self.evaluate(sess.graph.get_operation_by_name(initializer_name)) - output = self.evaluate(output_node) - self.assertNear(3.0, output, 0.00001) - output = self.evaluate(output_node) - self.assertNear(5.0, output, 0.00001) - - def _inline_functions(self, graph_def, arrays): - meta_graph = export_meta_graph(graph_def=graph_def) - fetch_collection = meta_graph_pb2.CollectionDef() - for name in arrays: - fetch_collection.node_list.value.append(name) - meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) - - # Initialize RewriterConfig with everything disabled except function - # inlining. - config = config_pb2.ConfigProto() - rewrite_options = config.graph_options.rewrite_options - rewrite_options.optimizers.append("function") - return tf_optimizer.OptimizeGraph(config, meta_graph) - - def _test_convert_variables_with_functions(self, inline_functions): - """Freezes a graph with functions.""" - - @function.Defun(dtypes.float32) - def plus_one(x): - return x + 1.0 - - with ops.Graph().as_default(): - variable_node = variables.Variable(1.0, name="variable_node") - _ = variables.Variable(1.0, name="unused_variable_node") - defun_node = plus_one(variable_node) - _ = math_ops_lib.multiply(defun_node, 2.0, name="output_node") - - with session.Session() as sess: - self.evaluate(variables.variables_initializer([variable_node])) - variable_graph_def = sess.graph.as_graph_def() - - if inline_functions: - # Run Grappler to create the VarOpHandle --> Placeholder --> - # ResourceVariable pattern. - variable_graph_def = self._inline_functions( - variable_graph_def, ["variable_node", "output_node"]) - - constant_graph_def = graph_util.convert_variables_to_constants( - sess, variable_graph_def, ["output_node"]) - - self._ensure_no_variables_in_graph(constant_graph_def) - - def testReferenceVariables(self): - """Freezes a graph with reference variables.""" - self._test_variable_to_const_conversion(use_resource=False) - - def testResourceVariables(self): - """Freezes a graph with resource variables.""" - self._test_variable_to_const_conversion(use_resource=True) - - def testWithFunctions(self): - """Freezes a graph with functions.""" - self._test_convert_variables_with_functions(inline_functions=False) - - def testWithInlinedFunctions(self): - """Freezes a graph with functions that have been inlined using Grappler.""" - self._test_convert_variables_with_functions(inline_functions=True) - - def testGraphWithSwitch(self): - """Freezes a graph which contains a Switch with type RESOURCE_DT.""" - with ops.Graph().as_default(): - with variable_scope.variable_scope("", use_resource=True): - x = variable_scope.get_variable("var_x", initializer=1.0) - y = variable_scope.get_variable("var_y", initializer=2.0) - f1 = lambda: variable_scope.get_variable("var_f1", initializer=17.0) - f2 = lambda: variable_scope.get_variable("var_f2", initializer=23.0) - cond_node = control_flow_ops.case([(gen_math_ops.less(x, y), f1)], - default=f2) - _ = math_ops_lib.multiply(cond_node, 2.0, name="output_node") - - with session.Session() as sess: - sess.run(variables.global_variables_initializer()) - variable_graph_def = sess.graph.as_graph_def() - - constant_graph_def = graph_util.convert_variables_to_constants( - sess, variable_graph_def, ["output_node"]) - - self._ensure_no_variables_in_graph(constant_graph_def) - - if __name__ == "__main__": test.main() From f467f35f25d5de217d19783da29b36594c55adc9 Mon Sep 17 00:00:00 2001 From: RJ Skerry-Ryan Date: Tue, 9 Jun 2020 09:23:02 -0700 Subject: [PATCH 123/178] Generalize xla.is_flat to include any Mapping type, not just dict. This allows custom mapping types to be returned from functions used with xla.compile. PiperOrigin-RevId: 315497847 Change-Id: I6498c13aaba4b564b04a5fa2be29d6ce7ae31356 --- tensorflow/python/compiler/xla/xla.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/compiler/xla/xla.py b/tensorflow/python/compiler/xla/xla.py index 622fadfba8a..0c00bebc9b8 100644 --- a/tensorflow/python/compiler/xla/xla.py +++ b/tensorflow/python/compiler/xla/xla.py @@ -429,11 +429,12 @@ def is_flat(outputs): # there is, then outputs is non-flat. if isinstance(outputs, collections.Sequence): for o in outputs: - if isinstance(o, collections.Sequence) or isinstance(o, dict): + if isinstance(o, collections.Sequence) or isinstance( + o, collections.Mapping): return False # If outputs is a dict, it is non-flat. - if isinstance(outputs, dict): + if isinstance(outputs, collections.Mapping): return False # Getting here means either outputs itself is a single non-structured value From 05b91630deb1e901a3bf4ec6e2531f73b72cd910 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 10:14:05 -0700 Subject: [PATCH 124/178] [TF:XLA] Clarify the error message that is returned when a variable is not initialized on the current device. The variable may already be initialized on another device. PiperOrigin-RevId: 315507521 Change-Id: I69e06fe5db788c0671a8ea05c086c713efa3be37 --- tensorflow/compiler/tests/variable_ops_test.py | 2 +- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py index fbc7ef49700..aeafc993a5b 100644 --- a/tensorflow/compiler/tests/variable_ops_test.py +++ b/tensorflow/compiler/tests/variable_ops_test.py @@ -486,7 +486,7 @@ class SliceAssignTest(xla_test.XLATestCase): def testUninitialized(self): with self.assertRaisesRegexp(errors.FailedPreconditionError, - "uninitialized variable"): + "Read variable failure"): with self.session() as sess, self.test_scope(): v = resource_variable_ops.ResourceVariable([1, 2]) sess.run(v[:].assign([1, 2])) diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index 2c6edf5389e..a72e3fd44dd 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -413,8 +413,10 @@ Status ReadVariableInputTensor(const Tensor& tensor, DataType type, TF_RET_CHECK(variable != nullptr); TF_RET_CHECK(variable->kind() == XlaResource::kVariable); if (!variable->initialized()) { - return errors::FailedPrecondition("Read of uninitialized variable ", - variable->name()); + return errors::FailedPrecondition( + "Read variable failure ", variable->name(), + ". It could mean the variable is not initialized or the variable is on " + "another device "); } if (variable->type() != type) { return errors::InvalidArgument( @@ -464,8 +466,10 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type, TF_RET_CHECK(variable != nullptr); TF_RET_CHECK(variable->kind() == XlaResource::kVariable); if (!variable->initialized()) { - return errors::InvalidArgument("Read of uninitialized variable ", - variable->name()); + return errors::InvalidArgument( + "Read variable failure ", variable->name(), + ". It could mean the variable is not initialized or the variable is on " + "another device "); } *type = variable->type(); *shape = variable->shape(); From f6d4243b81cfabb8dc42579c25bfff599a1243f7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 10:25:09 -0700 Subject: [PATCH 125/178] [Profiler] Simplify the Overview Page. PiperOrigin-RevId: 315509710 Change-Id: Ica3f8d9231d8f48a64bd089b36b912c664a0a5ed --- .../profiler/convert/op_stats_to_input_pipeline_analysis.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc index 0e04fc2b7ba..8864dbd4313 100644 --- a/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc +++ b/tensorflow/core/profiler/convert/op_stats_to_input_pipeline_analysis.cc @@ -637,10 +637,7 @@ void OutputAnalysis(double output_percent, std::string* output_classification, "you would need to reduce both the output time and other time."); } else { *output_classification = "device"; - *output_statement = - absl::StrCat("Your program is NOT output-bound because only ", - tc_outfeed_percent_str, - "% of the total step time sampled is spent on output."); + *output_statement = ""; } } From f4ef8b315e5de2009d3aee6bf16bd3bbd2760f02 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 9 Jun 2020 10:40:54 -0700 Subject: [PATCH 126/178] Port the raw_to_bitmap_test from TF test to use TF platform googletest. PiperOrigin-RevId: 315513067 Change-Id: Ia48c6d215008a82877edc0432f750cb9d6c5bb54 --- .../examples/person_detection/utils/BUILD | 25 +++++---- .../utils/raw_to_bitmap_test.py | 54 ++++++++----------- 2 files changed, 34 insertions(+), 45 deletions(-) diff --git a/tensorflow/lite/micro/examples/person_detection/utils/BUILD b/tensorflow/lite/micro/examples/person_detection/utils/BUILD index 98339572078..d8b5993cc1b 100644 --- a/tensorflow/lite/micro/examples/person_detection/utils/BUILD +++ b/tensorflow/lite/micro/examples/person_detection/utils/BUILD @@ -19,16 +19,15 @@ py_library( ], ) -# TODO(b/158529664): Re-enable this test by removing the TF python test lib dependency. -# py_test( -# name = "raw_to_bitmap_test", -# srcs = ["raw_to_bitmap_test.py"], -# data = glob(["testdata/**"]), -# python_version = "PY3", -# tags = ["noubsan"], # TODO(b/144512025): Fix raw_to_bitmap_test to fix ubsan failure. -# deps = [ -# ":raw_to_bitmap_lib", -# "//third_party/py/numpy", -# "//tensorflow/python:client_testlib", -# ], -# ) +py_test( + name = "raw_to_bitmap_test", + srcs = ["raw_to_bitmap_test.py"], + data = glob(["testdata/**"]), + python_version = "PY3", + tags = ["noubsan"], # TODO(b/144512025): Fix raw_to_bitmap_test to fix ubsan failure. + deps = [ + ":raw_to_bitmap_lib", + "//tensorflow/python:platform_test", + "//third_party/py/numpy", + ], +) diff --git a/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap_test.py b/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap_test.py index cc3af1bc1eb..ade895d6ee6 100644 --- a/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap_test.py +++ b/tensorflow/lite/micro/examples/person_detection/utils/raw_to_bitmap_test.py @@ -24,7 +24,7 @@ import numpy as np from tensorflow.lite.micro.examples.person_detection.utils.raw_to_bitmap import parse_file from tensorflow.lite.micro.examples.person_detection.utils.raw_to_bitmap import reshape_bitmaps -from tensorflow.python.platform import test +from tensorflow.python.platform import googletest _RGB_RAW = u""" +++ frame +++ @@ -40,11 +40,11 @@ _RGB_FLAT = np.array([[ 15, 15, 15 ]]) -_RGB_RESHAPED = np.array( - [[[[12, 12, 12], [13, 13, 13], [14, 14, 14], [15, 15, 15]], - [[8, 8, 8], [9, 9, 9], [10, 10, 10], [11, 11, 11]], - [[4, 4, 4], [5, 5, 5], [6, 6, 6], [7, 7, 7]], - [[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]]]]) +_RGB_RESHAPED = np.array([[[[12, 12, 12], [13, 13, 13], [14, 14, 14], + [15, 15, 15]], + [[8, 8, 8], [9, 9, 9], [10, 10, 10], [11, 11, 11]], + [[4, 4, 4], [5, 5, 5], [6, 6, 6], [7, 7, 7]], + [[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]]]]) _GRAYSCALE_RAW = u""" +++ frame +++ @@ -55,12 +55,9 @@ _GRAYSCALE_RAW = u""" _GRAYSCALE_FLAT = np.array( [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]) -_GRAYSCALE_RESHAPED = np.array([[[12, 13, 14, 15], - [8, 9, 10, 11], - [4, 5, 6, 7], +_GRAYSCALE_RESHAPED = np.array([[[12, 13, 14, 15], [8, 9, 10, 11], [4, 5, 6, 7], [0, 1, 2, 3]]]) - _GRAYSCALE_RAW_MULTI = u""" +++ frame +++ 0x0000 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f @@ -80,46 +77,39 @@ _GRAYSCALE_FLAT_MULTI = [ np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), np.array([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]), np.array([32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]), - np.array([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])] + np.array([48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63]) +] _GRAYSCALE_RESHAPED_MULTI = [ - np.array([[12, 13, 14, 15], - [8, 9, 10, 11], - [4, 5, 6, 7], - [0, 1, 2, 3]]), - np.array([[28, 29, 30, 31], - [24, 25, 26, 27], - [20, 21, 22, 23], + np.array([[12, 13, 14, 15], [8, 9, 10, 11], [4, 5, 6, 7], [0, 1, 2, 3]]), + np.array([[28, 29, 30, 31], [24, 25, 26, 27], [20, 21, 22, 23], [16, 17, 18, 19]]), - np.array([[44, 45, 46, 47], - [40, 41, 42, 43], - [36, 37, 38, 39], + np.array([[44, 45, 46, 47], [40, 41, 42, 43], [36, 37, 38, 39], [32, 33, 34, 35]]), - np.array([[60, 61, 62, 63], - [56, 57, 58, 59], - [52, 53, 54, 55], - [48, 49, 50, 51]])] + np.array([[60, 61, 62, 63], [56, 57, 58, 59], [52, 53, 54, 55], + [48, 49, 50, 51]]) +] -class RawToBitmapTest(test.TestCase): +class RawToBitmapTest(googletest.TestCase): - def testParseRgb(self): + def test_parse_rgb(self): frame_list = parse_file(io.StringIO(_RGB_RAW), 4, 4, 3) self.assertTrue(np.array_equal(_RGB_FLAT, frame_list)) - def testParseGrayscale(self): + def test_parse_grayscale(self): frame_list = parse_file(io.StringIO(_GRAYSCALE_RAW), 4, 4, 1) self.assertTrue(np.array_equal(_GRAYSCALE_FLAT, frame_list)) - def testReshapeRgb(self): + def test_reshape_rgb(self): reshaped = reshape_bitmaps(_RGB_FLAT, 4, 4, 3) self.assertTrue(np.array_equal(_RGB_RESHAPED, reshaped)) - def testReshapeGrayscale(self): + def test_reshape_grayscale(self): reshaped = reshape_bitmaps(_GRAYSCALE_FLAT, 4, 4, 1) self.assertTrue(np.array_equal(_GRAYSCALE_RESHAPED, reshaped)) - def testMultipleGrayscale(self): + def test_multiple_grayscale(self): frame_list = parse_file(io.StringIO(_GRAYSCALE_RAW_MULTI), 4, 4, 1) self.assertTrue(np.array_equal(_GRAYSCALE_FLAT_MULTI, frame_list)) reshaped = reshape_bitmaps(frame_list, 4, 4, 1) @@ -127,4 +117,4 @@ class RawToBitmapTest(test.TestCase): if __name__ == '__main__': - test.main() + googletest.main() From 0c8a72d5da936a457072cebc7616dae47c9b174e Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 9 Jun 2020 10:44:48 -0700 Subject: [PATCH 127/178] Remove run_deprecated_v1 decorator from feature_column_v2_test file. Rework tests to either use ops.Graph().as_default, or to work eagerly. PiperOrigin-RevId: 315513899 Change-Id: I4cff575c04db603f63b76a160dd3a2ab790dbcbb --- .../feature_column/feature_column_v2_test.py | 1752 ++++++++--------- 1 file changed, 847 insertions(+), 905 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 91fb7eadb89..844478c879b 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -240,7 +240,6 @@ class LazyColumnTest(test.TestCase): TypeError, '"key" must be either a "str" or "FeatureColumn".'): transformation_cache.get(NotAFeatureColumn(), None) - @test_util.run_deprecated_v1 def test_expand_dim_rank_1_sparse_tensor_empty_batch(self): # empty 1-D sparse tensor: transformation_cache = fc.FeatureTransformationCache( @@ -260,7 +259,6 @@ class LazyColumnTest(test.TestCase): class NumericColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): a = fc.numeric_column('aaa') self.assertEqual('aaa', a.key) @@ -341,7 +339,6 @@ class NumericColumnTest(test.TestCase): 'aaa': parsing_ops.FixedLenFeature((2, 3), dtype=dtypes.int32) }, a.parse_example_spec) - @test_util.run_deprecated_v1 def test_parse_example_no_default_value(self): price = fc.numeric_column('price', shape=[2]) data = example_pb2.Example( @@ -358,7 +355,6 @@ class NumericColumnTest(test.TestCase): self.assertAllEqual([[20., 110.]], self.evaluate(features['price'])) - @test_util.run_deprecated_v1 def test_parse_example_with_default_value(self): price = fc.numeric_column('price', shape=[2], default_value=11.) data = example_pb2.Example( @@ -388,7 +384,6 @@ class NumericColumnTest(test.TestCase): with self.assertRaisesRegexp(TypeError, 'must be a callable'): fc.numeric_column('price', normalizer_fn='NotACallable') - @test_util.run_deprecated_v1 def test_normalizer_fn_transform_feature(self): def _increment_two(input_tensor): @@ -401,7 +396,6 @@ class NumericColumnTest(test.TestCase): self.assertAllEqual([[3., 4.], [7., 8.]], self.evaluate(output[price])) - @test_util.run_deprecated_v1 def test_get_dense_tensor(self): def _increment_two(input_tensor): @@ -411,7 +405,7 @@ class NumericColumnTest(test.TestCase): transformation_cache = fc.FeatureTransformationCache({ 'price': [[1., 2.], [5., 6.]] }) - self.assertEqual( + self.assertAllEqual( transformation_cache.get(price, None), price.get_dense_tensor(transformation_cache, None)) @@ -425,7 +419,6 @@ class NumericColumnTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'must be a Tensor'): price.transform_feature(transformation_cache, None) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc.numeric_column('aaa', shape=[1, 2], default_value=[[3., 2.]]) a_copy = copy.deepcopy(a) @@ -452,7 +445,6 @@ class NumericColumnTest(test.TestCase): sess.run(price_var.assign([[10.]])) self.assertAllClose([[10.], [50.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): def _increment_two(input_tensor): @@ -543,7 +535,6 @@ class BucketizedColumnTest(test.TestCase): # Column 'aaa` has shape [2] times three buckets -> num_buckets=6. self.assertEqual(6, b.num_buckets) - @test_util.run_deprecated_v1 def test_parse_example(self): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 50]) @@ -561,7 +552,6 @@ class BucketizedColumnTest(test.TestCase): self.assertAllEqual([[20., 110.]], self.evaluate(features['price'])) - @test_util.run_deprecated_v1 def test_transform_feature(self): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) @@ -665,7 +655,6 @@ class BucketizedColumnTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'must be a Tensor'): bucketized_price.transform_feature(transformation_cache, None) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc.numeric_column('aaa', shape=[2]) a_bucketized = fc.bucketized_column(a, boundaries=[0, 1]) @@ -759,7 +748,6 @@ class BucketizedColumnTest(test.TestCase): self.assertAllClose([[11.], [21.], [41.], [51.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) @@ -795,7 +783,6 @@ class BucketizedColumnTest(test.TestCase): class HashedCategoricalColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): a = fc.categorical_column_with_hash_bucket('aaa', 10) self.assertEqual('aaa', a.name) @@ -826,7 +813,6 @@ class HashedCategoricalColumnTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'dtype must be string or integer'): fc.categorical_column_with_hash_bucket('aaa', 10, dtype=dtypes.float32) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc.categorical_column_with_hash_bucket('aaa', 10) for column in (original, copy.deepcopy(original)): @@ -847,7 +833,6 @@ class HashedCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int32) }, a.parse_example_spec) - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_hash_bucket('aaa', 10) data = example_pb2.Example( @@ -870,7 +855,6 @@ class HashedCategoricalColumnTest(test.TestCase): values=np.array([b'omar', b'stringer'], dtype=np.object_), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_strings_should_be_hashed(self): hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10) wire_tensor = sparse_tensor.SparseTensor( @@ -924,7 +908,6 @@ class HashedCategoricalColumnTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'dtype must be compatible'): transformation_cache.get(hashed_sparse, None) - @test_util.run_deprecated_v1 def test_ints_should_be_hashed(self): hashed_sparse = fc.categorical_column_with_hash_bucket( 'wire', 10, dtype=dtypes.int64) @@ -939,7 +922,6 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertAllEqual(expected_values, self.evaluate(output.values)) - @test_util.run_deprecated_v1 def test_int32_64_is_compatible(self): hashed_sparse = fc.categorical_column_with_hash_bucket( 'wire', 10, dtype=dtypes.int64) @@ -954,7 +936,6 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertAllEqual(expected_values, self.evaluate(output.values)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10) transformation_cache = fc.FeatureTransformationCache({ @@ -970,7 +951,6 @@ class HashedCategoricalColumnTest(test.TestCase): self.assertEqual( transformation_cache.get(hashed_sparse, None), id_weight_pair.id_tensor) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): hashed_sparse = fc.categorical_column_with_hash_bucket('wire', 10) transformation_cache = fc.FeatureTransformationCache({ @@ -1007,7 +987,6 @@ class HashedCategoricalColumnTest(test.TestCase): # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 self.assertAllClose(((4.,), (6.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): wire_column = fc.categorical_column_with_hash_bucket('wire', 4) self.assertEqual(['wire'], wire_column.parents) @@ -1109,7 +1088,6 @@ class CrossedColumnTest(test.TestCase): crossed = fc.crossed_column([b, 'c'], 15) self.assertEqual(15, crossed.num_buckets) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc.numeric_column('a', dtype=dtypes.int32) b = fc.bucketized_column(a, boundaries=[0, 1]) @@ -1123,7 +1101,6 @@ class CrossedColumnTest(test.TestCase): self.assertEqual(15, crossed2_copy.hash_bucket_size) self.assertEqual(5, crossed2_copy.hash_key) - @test_util.run_deprecated_v1 def test_parse_example(self): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 50]) @@ -1153,7 +1130,6 @@ class CrossedColumnTest(test.TestCase): self.evaluate(wire_sparse.values)) self.assertAllEqual([1, 2], self.evaluate(wire_sparse.dense_shape)) - @test_util.run_deprecated_v1 def test_transform_feature(self): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 50]) @@ -1178,7 +1154,6 @@ class CrossedColumnTest(test.TestCase): self.assertIn(val, list(range(hash_bucket_size))) self.assertAllEqual([2, 4], output_val.dense_shape) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) b = fc.bucketized_column(a, boundaries=(0, 1)) @@ -1391,7 +1366,6 @@ class CrossedColumnTest(test.TestCase): sess.run(bias.assign((.1,))) self.assertAllClose(((3.1,), (14.1,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) b = fc.bucketized_column(a, boundaries=(0, 1)) @@ -2026,94 +2000,97 @@ class OldLinearModelTest(test.TestCase): features['price2']: [[1.], [5.]], }) - @test_util.run_deprecated_v1 def test_with_1d_sparse_tensor(self): - price = fc.numeric_column('price') - price_buckets = fc.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + # This test does direct variable scope access + manipulations + # that require entering a legacy graph + with ops.Graph().as_default(): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - # Provides 1-dim tensor and dense tensor. - features = { - 'price': - constant_op.constant([ - -1., - 12., - ]), - 'body-style': - sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) + # Provides 1-dim tensor and dense tensor. + features = { + 'price': + constant_op.constant([ + -1., + 12., + ]), + 'body-style': + sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), + } + self.assertEqual(1, features['price'].shape.ndims) + self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - net = fc_old.linear_model(features, [price_buckets, body_style]) - with _initialized_session() as sess: + net = fc_old.linear_model(features, [price_buckets, body_style]) + with _initialized_session() as sess: + bias = get_linear_model_bias() + price_buckets_var = get_linear_model_column_var(price_buckets) + body_style_var = get_linear_model_column_var(body_style) + + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) + + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], + self.evaluate(net)) + + def test_with_1d_unknown_shape_sparse_tensor(self): + # This test needs to access variables bia variable scope & needs to be + # run inside of a legacy graph + with ops.Graph().as_default(): + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( + price, boundaries=[ + 0., + 10., + 100., + ]) + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + + # Provides 1-dim tensor and dense tensor. + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + + price_data = np.array([-1., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) + country_data = np.array(['US', 'CA']) + + net = fc_old.linear_model(features, [price_buckets, body_style, country]) bias = get_linear_model_bias() price_buckets_var = get_linear_model_column_var(price_buckets) body_style_var = get_linear_model_column_var(body_style) + with _initialized_session() as sess: + sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) + sess.run(bias.assign([5.])) - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) + self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], - self.evaluate(net)) - - @test_util.run_deprecated_v1 - def test_with_1d_unknown_shape_sparse_tensor(self): - price = fc.numeric_column('price') - price_buckets = fc.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - country = fc.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': array_ops.placeholder(dtypes.float32), - 'body-style': array_ops.sparse_placeholder(dtypes.string), - 'country': array_ops.placeholder(dtypes.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - - price_data = np.array([-1., 12.]) - body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array(['US', 'CA']) - - net = fc_old.linear_model(features, [price_buckets, body_style, country]) - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) - with _initialized_session() as sess: - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) - - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) - - @test_util.run_deprecated_v1 def test_with_rank_0_feature(self): price = fc.numeric_column('price') features = { @@ -2125,15 +2102,18 @@ class OldLinearModelTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): fc_old.linear_model(features, [price]) - # Dynamic rank 0 should fail - features = { - 'price': array_ops.placeholder(dtypes.float32), - } - net = fc_old.linear_model(features, [price]) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) + # This test needs to construct graph placeholders + # w/ dynamic rank 0, so we enter a graph + with ops.Graph().as_default(): + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = fc_old.linear_model(features, [price]) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) def test_multiple_linear_models(self): price = fc.numeric_column('price') @@ -2156,24 +2136,24 @@ class OldLinearModelTest(test.TestCase): sess.run(bias2.assign([5.])) self.assertAllClose([[25.], [105.]], self.evaluate(predictions2)) - @test_util.run_deprecated_v1 def test_linear_model_v1_shared_embedding_all_other_v2(self): - price = fc.numeric_column('price') # v2 - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) # v2 - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) # v2 - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) # v2 - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) # v2 - shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( - [categorical_column_a, categorical_column_b], dimension=2) # v1 - all_cols = [ - price, some_embedding_column, shared_embedding_a, shared_embedding_b - ] - + # SharedEmbeddingColumns are graph-only with ops.Graph().as_default(): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v2 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v2 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + features = { 'price': [[3.], [4.]], 'sparse_feature': [['a'], ['x']], @@ -2196,24 +2176,24 @@ class OldLinearModelTest(test.TestCase): self.assertAllClose([0.], self.evaluate(bias)) - @test_util.run_deprecated_v1 def test_linear_model_v1_shared_embedding_with_v2_cat_all_other_v2(self): - price = fc.numeric_column('price') # v2 - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) # v2 - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) # v2 - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) # v2 - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) # v2 - shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( - [categorical_column_a, categorical_column_b], dimension=2) # v1 - all_cols = [ - price, some_embedding_column, shared_embedding_a, shared_embedding_b - ] - + # SharedEmbeddingColumns are graph-only with ops.Graph().as_default(): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v2 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v2 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + features = { 'price': [[3.], [4.]], 'sparse_feature': [['a'], ['x']], @@ -2236,24 +2216,24 @@ class OldLinearModelTest(test.TestCase): self.assertAllClose([0.], self.evaluate(bias)) - @test_util.run_deprecated_v1 def test_linear_model_v1_v2_mix(self): - price = fc.numeric_column('price') # v2 - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) # v1 - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) # v1 - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) # v2 - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) # v2 - shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( - [categorical_column_a, categorical_column_b], dimension=2) # v1 - all_cols = [ - price, some_embedding_column, shared_embedding_a, shared_embedding_b - ] - + # SharedEmbeddingColumns are graph-only with ops.Graph().as_default(): + price = fc.numeric_column('price') # v2 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v1 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v1 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) # v1 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + features = { 'price': [[3.], [4.]], 'sparse_feature': [['a'], ['x']], @@ -2276,24 +2256,24 @@ class OldLinearModelTest(test.TestCase): self.assertAllClose([0.], self.evaluate(bias)) - @test_util.run_deprecated_v1 def test_linear_model_v2_shared_embedding_all_other_v1(self): - price = fc.numeric_column('price') # v1 - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) # v1 - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) # v1 - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) # v2 - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) # v2 - shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], dimension=2) # v2 - all_cols = [ - price, some_embedding_column, shared_embedding_a, shared_embedding_b - ] - + # SharedEmbeddingColumns are graph-only with ops.Graph().as_default(): + price = fc.numeric_column('price') # v1 + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) # v1 + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) # v1 + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) # v2 + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) # v2 + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], dimension=2) # v2 + all_cols = [ + price, some_embedding_column, shared_embedding_a, shared_embedding_b + ] + features = { 'price': [[3.], [4.]], 'sparse_feature': [['a'], ['x']], @@ -2547,27 +2527,27 @@ class FunctionalInputLayerTest(test.TestCase): variables_lib.VariableV1) self.assertAllEqual(cols_to_vars[some_embedding_column][0].shape, [5, 10]) - @test_util.run_deprecated_v1 def test_fills_cols_to_vars_shared_embedding(self): # Provide 5 DenseColumn's to input_layer: a NumericColumn, a # BucketizedColumn, an EmbeddingColumn, two SharedEmbeddingColumns. The # EmbeddingColumn creates a Variable and the two SharedEmbeddingColumns # shared one variable. - price1 = fc.numeric_column('price1') - dense_feature = fc.numeric_column('dense_feature') - dense_feature_bucketized = fc.bucketized_column( - dense_feature, boundaries=[0.]) - some_sparse_column = fc.categorical_column_with_hash_bucket( - 'sparse_feature', hash_bucket_size=5) - some_embedding_column = fc.embedding_column( - some_sparse_column, dimension=10) - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( - [categorical_column_a, categorical_column_b], dimension=2) + # SharedEmbeddingColumns are graph-only with ops.Graph().as_default(): + price1 = fc.numeric_column('price1') + dense_feature = fc.numeric_column('dense_feature') + dense_feature_bucketized = fc.bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + shared_embedding_a, shared_embedding_b = fc.shared_embedding_columns( + [categorical_column_a, categorical_column_b], dimension=2) features = { 'price1': [[3.], [4.]], 'dense_feature': [[-1.], [4.]], @@ -2748,7 +2728,6 @@ class FunctionalInputLayerTest(test.TestCase): expected_var_names, [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - @test_util.run_deprecated_v1 def test_with_1d_sparse_tensor(self): embedding_values = ( (1., 2., 3., 4., 5.), # id 0 @@ -2794,77 +2773,79 @@ class FunctionalInputLayerTest(test.TestCase): self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) self.assertEqual(1, features['country'].shape.ndims) - net = fc_old.input_layer(features, - [price, one_hot_body_style, embedded_country]) - self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: + if context.executing_eagerly(): + # The variables will only be initialized in TF2 + net = fc_old.input_layer(features, + [price, one_hot_body_style, embedded_country]) + self.assertEqual(1 + 3 + 5, net.shape[1]) # Each row is formed by concatenating `embedded_body_style`, # `one_hot_body_style`, and `price` in order. self.assertAllEqual([[0., 0., 1., 11., 12., 13., 14., 15., 11.], [1., 0., 0., 1., 2., 3., 4., 5., 12.]], - sess.run(net)) + self.evaluate(net)) - @test_util.run_deprecated_v1 def test_with_1d_unknown_shape_sparse_tensor(self): - embedding_values = ( - (1., 2.), # id 0 - (6., 7.), # id 1 - (11., 12.) # id 2 - ) + # This test needs to construct graph placeholders + # w/ unknown shapes, so we enter a graph + with ops.Graph().as_default(): + embedding_values = ( + (1., 2.), # id 0 + (6., 7.), # id 1 + (11., 12.) # id 2 + ) - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return embedding_values + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return embedding_values - # price has 1 dimension in input_layer - price = fc.numeric_column('price') + # price has 1 dimension in input_layer + price = fc.numeric_column('price') - # one_hot_body_style has 3 dims in input_layer. - body_style = fc.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - one_hot_body_style = fc.indicator_column(body_style) + # one_hot_body_style has 3 dims in input_layer. + body_style = fc.categorical_column_with_vocabulary_list( + 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) + one_hot_body_style = fc.indicator_column(body_style) - # embedded_body_style has 5 dims in input_layer. - country = fc.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) - embedded_country = fc.embedding_column( - country, dimension=2, initializer=_initializer) + # embedded_body_style has 5 dims in input_layer. + country = fc.categorical_column_with_vocabulary_list( + 'country', vocabulary_list=['US', 'JP', 'CA']) + embedded_country = fc.embedding_column( + country, dimension=2, initializer=_initializer) - # Provides 1-dim tensor and dense tensor. - features = { - 'price': array_ops.placeholder(dtypes.float32), - 'body-style': array_ops.sparse_placeholder(dtypes.string), - # This is dense tensor for the categorical_column. - 'country': array_ops.placeholder(dtypes.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) - self.assertIsNone(features['country'].shape.ndims) + # Provides 1-dim tensor and dense tensor. + features = { + 'price': array_ops.placeholder(dtypes.float32), + 'body-style': array_ops.sparse_placeholder(dtypes.string), + # This is dense tensor for the categorical_column. + 'country': array_ops.placeholder(dtypes.string), + } + self.assertIsNone(features['price'].shape.ndims) + self.assertIsNone(features['body-style'].get_shape().ndims) + self.assertIsNone(features['country'].shape.ndims) - price_data = np.array([11., 12.]) - body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) - country_data = np.array([['US'], ['CA']]) + price_data = np.array([11., 12.]) + body_style_data = sparse_tensor.SparseTensorValue( + indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) + country_data = np.array([['US'], ['CA']]) - net = fc_old.input_layer(features, - [price, one_hot_body_style, embedded_country]) - self.assertEqual(1 + 3 + 2, net.shape[1]) - with _initialized_session() as sess: + net = fc_old.input_layer(features, + [price, one_hot_body_style, embedded_country]) + self.assertEqual(1 + 3 + 2, net.shape[1]) + with _initialized_session() as sess: - # Each row is formed by concatenating `embedded_body_style`, - # `one_hot_body_style`, and `price` in order. - self.assertAllEqual( - [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) + # Each row is formed by concatenating `embedded_body_style`, + # `one_hot_body_style`, and `price` in order. + self.assertAllEqual( + [[0., 0., 1., 1., 2., 11.], [1., 0., 0., 11., 12., 12.]], + sess.run( + net, + feed_dict={ + features['price']: price_data, + features['body-style']: body_style_data, + features['country']: country_data + })) - @test_util.run_deprecated_v1 def test_with_rank_0_feature(self): # price has 1 dimension in input_layer price = fc.numeric_column('price') @@ -2877,15 +2858,18 @@ class FunctionalInputLayerTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): fc_old.input_layer(features, [price]) - # Dynamic rank 0 should fail - features = { - 'price': array_ops.placeholder(dtypes.float32), - } - net = fc_old.input_layer(features, [price]) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) + # This test needs to construct graph placeholders + # w/ dynamic rank 0, so we enter a graph + with ops.Graph().as_default(): + # Dynamic rank 0 should fail + features = { + 'price': array_ops.placeholder(dtypes.float32), + } + net = fc_old.input_layer(features, [price]) + self.assertEqual(1, net.shape[1]) + with _initialized_session() as sess: + with self.assertRaisesOpError('Feature .* cannot have rank 0'): + sess.run(net, feed_dict={features['price']: np.array(1)}) class MakeParseExampleSpecTest(test.TestCase): @@ -3032,7 +3016,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): self._unicode_vocabulary_file_name = test.test_src_dir_path( 'python/feature_column/testdata/unicode_vocabulary') - @test_util.run_deprecated_v1 def test_defaults(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='path_to_file', vocabulary_size=3) @@ -3044,7 +3027,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): }, column.parse_example_spec) self.assertTrue(column._is_v2_column) - @test_util.run_deprecated_v1 def test_defaults_unicode(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._unicode_vocabulary_file_name) @@ -3060,7 +3042,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): fc.categorical_column_with_vocabulary_file( key=('aaa',), vocabulary_file='path_to_file', vocabulary_size=3) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3073,7 +3054,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int32) }, column.parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3098,7 +3078,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='', vocabulary_size=3) - @test_util.run_deprecated_v1 def test_invalid_vocabulary_file(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='file_does_not_exist', vocabulary_size=10) @@ -3106,11 +3085,11 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - column.get_sparse_tensors( - fc.FeatureTransformationCache({ - 'aaa': inputs - }), None) with self.assertRaisesRegexp(errors.OpError, 'file_does_not_exist'): + column.get_sparse_tensors( + fc.FeatureTransformationCache({ + 'aaa': inputs + }), None) self.evaluate(lookup_ops.tables_initializer()) def test_invalid_vocabulary_size(self): @@ -3125,7 +3104,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=0) - @test_util.run_deprecated_v1 def test_too_large_vocabulary_size(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3135,11 +3113,11 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - column.get_sparse_tensors( - fc.FeatureTransformationCache({ - 'aaa': inputs - }), None) with self.assertRaisesRegexp(errors.OpError, 'Invalid vocab_size'): + column.get_sparse_tensors( + fc.FeatureTransformationCache({ + 'aaa': inputs + }), None) self.evaluate(lookup_ops.tables_initializer()) def test_invalid_num_oov_buckets(self): @@ -3200,7 +3178,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): 'aaa': inputs }), None) - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file='path_to_file', vocabulary_size=3) @@ -3224,7 +3201,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=np.array([b'omar', b'stringer'], dtype=np.object_), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3251,7 +3227,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_none_vocabulary_size(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name) @@ -3276,7 +3251,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_transform_feature(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3300,7 +3274,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=np.array((2, -1, 0), dtype=np.int64), dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3322,7 +3295,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=np.array((2, -1, 0), dtype=np.int64), dense_shape=(2, 2)), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_default_value_in_vocabulary(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3350,7 +3322,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_oov_buckets(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3378,7 +3349,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_small_vocabulary_size(self): # 'marlo' is the last entry in our vocabulary file, so be setting # `vocabulary_size` to 1 less than number of entries in file, we take @@ -3408,7 +3378,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3436,7 +3405,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_dense_input(self): default_value = -100 column = fc.categorical_column_with_vocabulary_file( @@ -3461,7 +3429,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): values=np.array((2, default_value, 0, 4), dtype=np.int64), dense_shape=(3, 3)), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_with_oov_buckets(self): column = fc.categorical_column_with_vocabulary_file( key='aaa', @@ -3519,7 +3486,6 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): wire_column = fc.categorical_column_with_vocabulary_file( key='wire', @@ -3571,7 +3537,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, column.parse_example_spec) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3583,7 +3548,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int32) }, column.parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=(12, 24, 36), dtype=dtypes.int32) @@ -3678,7 +3642,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): 'aaa': inputs }), None) - @test_util.run_deprecated_v1 def test_parse_example_string(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -3702,7 +3665,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=np.array([b'omar', b'stringer'], dtype=np.object_), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_parse_example_int(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=(11, 21, 31)) @@ -3724,7 +3686,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): indices=[[0, 0], [0, 1]], values=[11, 21], dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -3749,7 +3710,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_transform_feature(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -3771,7 +3731,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=np.array((2, -1, 0), dtype=np.int64), dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -3791,7 +3750,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=np.array((2, -1, 0), dtype=np.int64), dense_shape=(2, 2)), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_default_value_in_vocabulary(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3818,7 +3776,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_oov_buckets(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3845,7 +3802,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3872,7 +3828,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_dense_input(self): default_value = -100 column = fc.categorical_column_with_vocabulary_list( @@ -3898,7 +3853,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): values=np.array((2, default_value, 0, 4), dtype=np.int64), dense_shape=(3, 3)), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_int32_with_oov_buckets(self): column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3954,7 +3908,6 @@ class VocabularyListCategoricalColumnTest(test.TestCase): # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 self.assertAllClose(((3.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): wire_column = fc.categorical_column_with_vocabulary_list( key='aaa', @@ -3992,7 +3945,6 @@ class IdentityCategoricalColumnTest(test.TestCase): with self.assertRaisesRegexp(ValueError, 'key must be a string.'): fc.categorical_column_with_identity(key=('aaa',), num_buckets=3) - @test_util.run_deprecated_v1 def test_deep_copy(self): original = fc.categorical_column_with_identity(key='aaa', num_buckets=3) for column in (original, copy.deepcopy(original)): @@ -4032,7 +3984,6 @@ class IdentityCategoricalColumnTest(test.TestCase): 'aaa': inputs }), None) - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_identity(key='aaa', num_buckets=30) data = example_pb2.Example( @@ -4054,7 +4005,6 @@ class IdentityCategoricalColumnTest(test.TestCase): values=np.array([11, 21], dtype=np.int64), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_get_sparse_tensors(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( @@ -4076,7 +4026,6 @@ class IdentityCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_transform_feature(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( @@ -4095,7 +4044,6 @@ class IdentityCategoricalColumnTest(test.TestCase): values=np.array((0, 1, 0), dtype=np.int64), dense_shape=inputs.dense_shape), self.evaluate(id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_dense_input(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) id_weight_pair = column.get_sparse_tensors( @@ -4150,14 +4098,11 @@ class IdentityCategoricalColumnTest(test.TestCase): self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) expected_lookups = ((1., 2.), (3., 5)) - with _initialized_session(): - self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) + self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_inputs_too_small(self): self._test_get_sparse_tensors_with_inputs_too_small() - @test_util.run_deprecated_v1 @test_util.enable_control_flow_v2 def test_get_sparse_tensors_with_inputs_too_small_v2(self): self._test_get_sparse_tensors_with_inputs_too_small() @@ -4189,27 +4134,24 @@ class IdentityCategoricalColumnTest(test.TestCase): state_manager = _TestStateManager() embedding_column.create_state(state_manager) - # Provide sparse input and get dense result. - embedding_lookup = embedding_column.get_dense_tensor( - fc.FeatureTransformationCache({'aaa': sparse_input}), state_manager) - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - with self.assertRaisesRegexp(errors.OpError, r'indices\[0\] = 2 is not in \[0, 2\)'): + # Provide sparse input and get dense result. + embedding_lookup = embedding_column.get_dense_tensor( + fc.FeatureTransformationCache({'aaa': sparse_input}), state_manager) + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(embedding_lookup) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_inputs_too_big(self): self._test_get_sparse_tensors_with_inputs_too_big() - @test_util.run_deprecated_v1 @test_util.enable_control_flow_v2 def test_get_sparse_tensors_with_inputs_too_big_v2(self): self._test_get_sparse_tensors_with_inputs_too_big() - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_default_value(self): column = fc.categorical_column_with_identity( key='aaa', num_buckets=4, default_value=3) @@ -4234,36 +4176,38 @@ class IdentityCategoricalColumnTest(test.TestCase): dense_shape=inputs.dense_shape), self.evaluate(id_weight_pair.id_tensor)) - @test_util.run_deprecated_v1 def test_get_sparse_tensors_with_default_value_and_placeholder_inputs(self): - column = fc.categorical_column_with_identity( - key='aaa', num_buckets=4, default_value=3) - input_indices = array_ops.placeholder(dtype=dtypes.int64) - input_values = array_ops.placeholder(dtype=dtypes.int32) - input_shape = array_ops.placeholder(dtype=dtypes.int64) - inputs = sparse_tensor.SparseTensorValue( - indices=input_indices, values=input_values, dense_shape=input_shape) - id_weight_pair = column.get_sparse_tensors( - fc.FeatureTransformationCache({ - 'aaa': inputs - }), None) - self.assertIsNone(id_weight_pair.weight_tensor) + # This test needs to run in a graph because it is explicitly testing + # graph placeholders + with ops.Graph().as_default(): + column = fc.categorical_column_with_identity( + key='aaa', num_buckets=4, default_value=3) + input_indices = array_ops.placeholder(dtype=dtypes.int64) + input_values = array_ops.placeholder(dtype=dtypes.int32) + input_shape = array_ops.placeholder(dtype=dtypes.int64) + inputs = sparse_tensor.SparseTensorValue( + indices=input_indices, values=input_values, dense_shape=input_shape) + id_weight_pair = column.get_sparse_tensors( + fc.FeatureTransformationCache({ + 'aaa': inputs + }), None) + self.assertIsNone(id_weight_pair.weight_tensor) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=np.array(((0, 0), (1, 0), (1, 1)), dtype=np.int64), - values=np.array((1, 3, 3), dtype=np.int64), - dense_shape=np.array((2, 2), dtype=np.int64)), - id_weight_pair.id_tensor.eval( - feed_dict={ - input_indices: ((0, 0), (1, 0), (1, 1)), - input_values: (1, -1, 99), - input_shape: (2, 2), - })) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + with _initialized_session(): + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=np.array(((0, 0), (1, 0), (1, 1)), dtype=np.int64), + values=np.array((1, 3, 3), dtype=np.int64), + dense_shape=np.array((2, 2), dtype=np.int64)), + id_weight_pair.id_tensor.eval( + feed_dict={ + input_indices: ((0, 0), (1, 0), (1, 1)), + input_values: (1, -1, 99), + input_shape: (2, 2), + })) def test_old_linear_model(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) @@ -4290,7 +4234,6 @@ class IdentityCategoricalColumnTest(test.TestCase): # weight_var[2] + weight_var[1] = 3+2 = 5 self.assertAllClose(((1.,), (5.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization(self): column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) @@ -4448,7 +4391,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[0., 1., 1., 0.]], self.evaluate(output)) - @test_util.run_deprecated_v1 def test_deep_copy(self): a = fc.categorical_column_with_hash_bucket('a', 4) column = fc.indicator_column(a) @@ -4457,7 +4399,6 @@ class IndicatorColumnTest(test.TestCase): self.assertEqual(column.name, 'a_indicator') self.assertEqual(column.variable_shape, [1, 4]) - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -4482,7 +4423,6 @@ class IndicatorColumnTest(test.TestCase): values=np.array([b'omar', b'stringer'], dtype=np.object_), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_transform(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -4502,7 +4442,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[0, 0, 1], [1, 0, 0]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_weighted_column(self): # Github issue 12557 ids = fc.categorical_column_with_vocabulary_list( @@ -4521,7 +4460,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[6., 4., 3.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_missing_value_in_weighted_column(self): # Github issue 12583 ids = fc.categorical_column_with_vocabulary_list( @@ -4540,7 +4478,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[0., 4., 2.]], self.evaluate(indicator_tensor)) - @test_util.run_deprecated_v1 def test_transform_with_missing_value_in_categorical_column(self): # Github issue 12583 ids = fc.categorical_column_with_vocabulary_list( @@ -4601,7 +4538,6 @@ class IndicatorColumnTest(test.TestCase): self.evaluate(weight_var.assign([[1.], [2.], [3.], [4.]])) self.assertAllClose([[2. + 3.]], self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_input_layer(self): animal = fc.indicator_column( fc.categorical_column_with_identity('animal', num_buckets=4)) @@ -4634,7 +4570,6 @@ class IndicatorColumnTest(test.TestCase): self.assertAllClose([[0., 1., 1., 0.]], self.evaluate(net)) - @test_util.run_deprecated_v1 def test_serialization(self): parent = fc.categorical_column_with_identity('animal', num_buckets=4) animal = fc.indicator_column(parent) @@ -4707,7 +4642,6 @@ class _TestStateManager(fc.StateManager): class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4736,7 +4670,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): categorical_column, dimension=embedding_dimension) self.assertFalse(embedding_column._is_v2_column) - @test_util.run_deprecated_v1 def test_all_constructor_args(self): categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4763,7 +4696,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column.parse_example_spec) - @test_util.run_deprecated_v1 def test_deep_copy(self): categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=3) @@ -4796,14 +4728,12 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column.parse_example_spec) - @test_util.run_deprecated_v1 def test_invalid_initializer(self): categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=3) with self.assertRaisesRegexp(ValueError, 'initializer must be callable'): fc.embedding_column(categorical_column, dimension=2, initializer='not_fn') - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -4828,7 +4758,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): values=np.array([b'omar', b'stringer'], dtype=np.object_), dense_shape=[1, 2]), self.evaluate(features['aaa'])) - @test_util.run_deprecated_v1 def test_transform_feature(self): a = fc.categorical_column_with_identity(key='aaa', num_buckets=3) a_embedded = fc.embedding_column(a, dimension=2) @@ -4849,7 +4778,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): _assert_sparse_tensor_value(self, self.evaluate(output_a), self.evaluate(output_embedded)) - @test_util.run_deprecated_v1 def test_get_dense_tensor(self): # Inputs. vocabulary_size = 3 @@ -4905,17 +4833,17 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): }), state_manager) # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) + if not context.executing_eagerly(): + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_get_dense_tensor_old_categorical(self): # Inputs. vocabulary_size = 3 @@ -4969,17 +4897,17 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): })) # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) + if not context.executing_eagerly(): + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_get_dense_tensor_3d(self): # Inputs. vocabulary_size = 4 @@ -5037,97 +4965,99 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): }), state_manager) # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) + if not context.executing_eagerly(): + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_get_dense_tensor_placeholder_inputs(self): - # Inputs. - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 5)) + # This test explicitly checks graph placeholders, so we need to + # be in a graph + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 4), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 5)) - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - # Expected lookup result, using combiner='mean'. - expected_lookups = ( - # example 0, ids [2], embedding = [7, 11] - (7., 11.), - # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - (2., 3.5), - # example 2, ids [], embedding = [0, 0] - (0., 0.), - # example 3, ids [1], embedding = [3, 5] - (3., 5.), - ) + # Expected lookup result, using combiner='mean'. + expected_lookups = ( + # example 0, ids [2], embedding = [7, 11] + (7., 11.), + # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + (2., 3.5), + # example 2, ids [], embedding = [0, 0] + (0., 0.), + # example 3, ids [1], embedding = [3, 5] + (3., 5.), + ) - # Build columns. - categorical_column = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - state_manager = _TestStateManager() - embedding_column.create_state(state_manager) + # Build columns. + categorical_column = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = fc.embedding_column( + categorical_column, + dimension=embedding_dimension, + initializer=_initializer) + state_manager = _TestStateManager() + embedding_column.create_state(state_manager) - # Provide sparse input and get dense result. - input_indices = array_ops.placeholder(dtype=dtypes.int64) - input_values = array_ops.placeholder(dtype=dtypes.int64) - input_shape = array_ops.placeholder(dtype=dtypes.int64) - embedding_lookup = embedding_column.get_dense_tensor( - fc.FeatureTransformationCache({ - 'aaa': - sparse_tensor.SparseTensorValue( - indices=input_indices, - values=input_values, - dense_shape=input_shape) - }), state_manager) + # Provide sparse input and get dense result. + input_indices = array_ops.placeholder(dtype=dtypes.int64) + input_values = array_ops.placeholder(dtype=dtypes.int64) + input_shape = array_ops.placeholder(dtype=dtypes.int64) + embedding_lookup = embedding_column.get_dense_tensor( + fc.FeatureTransformationCache({ + 'aaa': + sparse_tensor.SparseTensorValue( + indices=input_indices, + values=input_values, + dense_shape=input_shape) + }), state_manager) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - with _initialized_session(): - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) - self.assertAllEqual( - expected_lookups, - embedding_lookup.eval( - feed_dict={ - input_indices: sparse_input.indices, - input_values: sparse_input.values, - input_shape: sparse_input.dense_shape, - })) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + with _initialized_session(): + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual( + expected_lookups, + embedding_lookup.eval( + feed_dict={ + input_indices: sparse_input.indices, + input_values: sparse_input.values, + input_shape: sparse_input.dense_shape, + })) - @test_util.run_deprecated_v1 def test_get_dense_tensor_restore_from_ckpt(self): # Inputs. vocabulary_size = 3 @@ -5181,17 +5111,17 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): }), state_manager) # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('embedding_weights:0',), - tuple([v.name for v in global_vars])) + if not context.executing_eagerly(): + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('embedding_weights:0',), + tuple([v.name for v in global_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) + self.assertAllEqual(embedding_values, self.evaluate(global_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(embedding_lookup)) - @test_util.run_deprecated_v1 def test_input_layer(self): # Inputs. vocabulary_size = 3 @@ -5243,18 +5173,19 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'aaa': sparse_input }, (embedding_column,)) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in global_vars])) - trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), - tuple([v.name for v in trainable_vars])) + if not context.executing_eagerly(): + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in global_vars])) + trainable_vars = ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) + self.assertCountEqual(('input_layer/aaa_embedding/embedding_weights:0',), + tuple([v.name for v in trainable_vars])) - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) + self.assertAllEqual(embedding_values, self.evaluate(trainable_vars[0])) self.assertAllEqual(expected_lookups, self.evaluate(feature_layer)) def test_old_linear_model(self): @@ -5421,7 +5352,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), self.evaluate(predictions)) - @test_util.run_deprecated_v1 def test_serialization_with_default_initializer(self): # Build columns. @@ -5473,7 +5403,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): new_embedding_column.get_config()) self.assertIs(categorical_column, new_embedding_column.categorical_column) - @test_util.run_deprecated_v1 def test_serialization_with_custom_initializer(self): def _initializer(shape, dtype, partition_info=None): @@ -5535,209 +5464,223 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( - [categorical_column_b, categorical_column_a], - dimension=embedding_dimension) - self.assertIs(categorical_column_a, embedding_column_a.categorical_column) - self.assertIs(categorical_column_b, embedding_column_b.categorical_column) - self.assertIsNone(embedding_column_a.max_norm) - self.assertIsNone(embedding_column_b.max_norm) - self.assertEqual('aaa_shared_embedding', embedding_column_a.name) - self.assertEqual('bbb_shared_embedding', embedding_column_b.name) - self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) - self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a.parse_example_spec) - self.assertEqual({ - 'bbb': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_b.parse_example_spec) - - @test_util.run_deprecated_v1 - def test_all_constructor_args(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - combiner='my_combiner', - initializer=lambda: 'my_initializer', - shared_embedding_collection_name='shared_embedding_collection_name', - ckpt_to_load_from='my_ckpt', - tensor_name_in_ckpt='my_ckpt_tensor', - max_norm=42., - trainable=False) - self.assertIs(categorical_column_a, embedding_column_a.categorical_column) - self.assertIs(categorical_column_b, embedding_column_b.categorical_column) - self.assertEqual(42., embedding_column_a.max_norm) - self.assertEqual(42., embedding_column_b.max_norm) - self.assertEqual('aaa_shared_embedding', embedding_column_a.name) - self.assertEqual('bbb_shared_embedding', embedding_column_b.name) - self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) - self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a.parse_example_spec) - self.assertEqual({ - 'bbb': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_b.parse_example_spec) - - @test_util.run_deprecated_v1 - def test_deep_copy(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_dimension = 2 - original_a, _ = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - combiner='my_combiner', - initializer=lambda: 'my_initializer', - shared_embedding_collection_name='shared_embedding_collection_name', - ckpt_to_load_from='my_ckpt', - tensor_name_in_ckpt='my_ckpt_tensor', - max_norm=42., - trainable=False) - for embedding_column_a in (original_a, copy.deepcopy(original_a)): - self.assertEqual('aaa', embedding_column_a.categorical_column.name) - self.assertEqual(3, embedding_column_a.categorical_column.num_buckets) - self.assertEqual({ - 'aaa': parsing_ops.VarLenFeature(dtypes.int64) - }, embedding_column_a.categorical_column.parse_example_spec) - - self.assertEqual(42., embedding_column_a.max_norm) + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_b, embedding_column_a = fc.shared_embedding_columns_v2( + [categorical_column_b, categorical_column_a], + dimension=embedding_dimension) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) + self.assertIsNone(embedding_column_a.max_norm) + self.assertIsNone(embedding_column_b.max_norm) self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape) + self.assertEqual((embedding_dimension,), + embedding_column_b.variable_shape) self.assertEqual({ 'aaa': parsing_ops.VarLenFeature(dtypes.int64) }, embedding_column_a.parse_example_spec) + self.assertEqual({ + 'bbb': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_b.parse_example_spec) - @test_util.run_deprecated_v1 - def test_invalid_initializer(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - with self.assertRaisesRegexp(ValueError, 'initializer must be callable'): - fc.shared_embedding_columns_v2( + def test_all_constructor_args(self): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], - dimension=2, - initializer='not_fn') + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., + trainable=False) + self.assertIs(categorical_column_a, embedding_column_a.categorical_column) + self.assertIs(categorical_column_b, embedding_column_b.categorical_column) + self.assertEqual(42., embedding_column_a.max_norm) + self.assertEqual(42., embedding_column_b.max_norm) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual('bbb_shared_embedding', embedding_column_b.name) + self.assertEqual((embedding_dimension,), + embedding_column_a.variable_shape) + self.assertEqual((embedding_dimension,), + embedding_column_b.variable_shape) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a.parse_example_spec) + self.assertEqual({ + 'bbb': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_b.parse_example_spec) + + def test_deep_copy(self): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_dimension = 2 + original_a, _ = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + combiner='my_combiner', + initializer=lambda: 'my_initializer', + shared_embedding_collection_name='shared_embedding_collection_name', + ckpt_to_load_from='my_ckpt', + tensor_name_in_ckpt='my_ckpt_tensor', + max_norm=42., + trainable=False) + for embedding_column_a in (original_a, copy.deepcopy(original_a)): + self.assertEqual('aaa', embedding_column_a.categorical_column.name) + self.assertEqual(3, embedding_column_a.categorical_column.num_buckets) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a.categorical_column.parse_example_spec) + + self.assertEqual(42., embedding_column_a.max_norm) + self.assertEqual('aaa_shared_embedding', embedding_column_a.name) + self.assertEqual((embedding_dimension,), + embedding_column_a.variable_shape) + self.assertEqual({ + 'aaa': parsing_ops.VarLenFeature(dtypes.int64) + }, embedding_column_a.parse_example_spec) + + def test_invalid_initializer(self): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + with self.assertRaisesRegexp(ValueError, 'initializer must be callable'): + fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=2, + initializer='not_fn') - @test_util.run_deprecated_v1 def test_incompatible_column_type(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - categorical_column_c = fc.categorical_column_with_hash_bucket( - key='ccc', hash_bucket_size=3) - with self.assertRaisesRegexp( - ValueError, 'all categorical_columns must have the same type.*' - 'IdentityCategoricalColumn.*HashedCategoricalColumn'): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + categorical_column_c = fc.categorical_column_with_hash_bucket( + key='ccc', hash_bucket_size=3) + with self.assertRaisesRegexp( + ValueError, 'all categorical_columns must have the same type.*' + 'IdentityCategoricalColumn.*HashedCategoricalColumn'): + fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b, categorical_column_c], + dimension=2) + + def test_weighted_categorical_column_ok(self): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + weighted_categorical_column_a = fc.weighted_categorical_column( + categorical_column_a, weight_feature_key='aaa_weights') + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + weighted_categorical_column_b = fc.weighted_categorical_column( + categorical_column_b, weight_feature_key='bbb_weights') fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b, categorical_column_c], + [weighted_categorical_column_a, categorical_column_b], dimension=2) + fc.shared_embedding_columns_v2( + [categorical_column_a, weighted_categorical_column_b], dimension=2) + fc.shared_embedding_columns_v2( + [weighted_categorical_column_a, weighted_categorical_column_b], dimension=2) - @test_util.run_deprecated_v1 - def test_weighted_categorical_column_ok(self): - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - weighted_categorical_column_a = fc.weighted_categorical_column( - categorical_column_a, weight_feature_key='aaa_weights') - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - weighted_categorical_column_b = fc.weighted_categorical_column( - categorical_column_b, weight_feature_key='bbb_weights') - fc.shared_embedding_columns_v2( - [weighted_categorical_column_a, categorical_column_b], dimension=2) - fc.shared_embedding_columns_v2( - [categorical_column_a, weighted_categorical_column_b], dimension=2) - fc.shared_embedding_columns_v2( - [weighted_categorical_column_a, weighted_categorical_column_b], - dimension=2) - - @test_util.run_deprecated_v1 def test_parse_example(self): - a = fc.categorical_column_with_vocabulary_list( - key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - b = fc.categorical_column_with_vocabulary_list( - key='bbb', vocabulary_list=('omar', 'stringer', 'marlo')) - a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], dimension=2) - data = example_pb2.Example( - features=feature_pb2.Features( - feature={ - 'aaa': - feature_pb2.Feature( - bytes_list=feature_pb2.BytesList( - value=[b'omar', b'stringer'])), - 'bbb': - feature_pb2.Feature( - bytes_list=feature_pb2.BytesList( - value=[b'stringer', b'marlo'])), - })) - features = parsing_ops.parse_example( - serialized=[data.SerializeToString()], - features=fc.make_parse_example_spec_v2([a_embedded, b_embedded])) - self.assertIn('aaa', features) - self.assertIn('bbb', features) + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + a = fc.categorical_column_with_vocabulary_list( + key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) + b = fc.categorical_column_with_vocabulary_list( + key='bbb', vocabulary_list=('omar', 'stringer', 'marlo')) + a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], + dimension=2) + data = example_pb2.Example( + features=feature_pb2.Features( + feature={ + 'aaa': + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=[b'omar', b'stringer'])), + 'bbb': + feature_pb2.Feature( + bytes_list=feature_pb2.BytesList( + value=[b'stringer', b'marlo'])), + })) + features = parsing_ops.parse_example( + serialized=[data.SerializeToString()], + features=fc.make_parse_example_spec_v2([a_embedded, b_embedded])) + self.assertIn('aaa', features) + self.assertIn('bbb', features) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [0, 1]], - values=np.array([b'omar', b'stringer'], dtype=np.object_), - dense_shape=[1, 2]), self.evaluate(features['aaa'])) - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=[[0, 0], [0, 1]], - values=np.array([b'stringer', b'marlo'], dtype=np.object_), - dense_shape=[1, 2]), self.evaluate(features['bbb'])) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'omar', b'stringer'], dtype=np.object_), + dense_shape=[1, 2]), self.evaluate(features['aaa'])) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [0, 1]], + values=np.array([b'stringer', b'marlo'], dtype=np.object_), + dense_shape=[1, 2]), self.evaluate(features['bbb'])) - @test_util.run_deprecated_v1 def test_transform_feature(self): - a = fc.categorical_column_with_identity(key='aaa', num_buckets=3) - b = fc.categorical_column_with_identity(key='bbb', num_buckets=3) - a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], dimension=2) - features = { - 'aaa': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 1, 0), - dense_shape=(2, 2)), - 'bbb': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 1), - dense_shape=(2, 2)), - } - outputs = fc._transform_features_v2(features, - [a, a_embedded, b, b_embedded], None) - output_a = outputs[a] - output_a_embedded = outputs[a_embedded] - output_b = outputs[b] - output_b_embedded = outputs[b_embedded] + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + a = fc.categorical_column_with_identity(key='aaa', num_buckets=3) + b = fc.categorical_column_with_identity(key='bbb', num_buckets=3) + a_embedded, b_embedded = fc.shared_embedding_columns_v2([a, b], + dimension=2) + features = { + 'aaa': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 1, 0), + dense_shape=(2, 2)), + 'bbb': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 1), + dense_shape=(2, 2)), + } + outputs = fc._transform_features_v2(features, + [a, a_embedded, b, b_embedded], None) + output_a = outputs[a] + output_a_embedded = outputs[a_embedded] + output_b = outputs[b] + output_b_embedded = outputs[b_embedded] - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - _assert_sparse_tensor_value(self, self.evaluate(output_a), - self.evaluate(output_a_embedded)) - _assert_sparse_tensor_value(self, self.evaluate(output_b), - self.evaluate(output_b_embedded)) + _assert_sparse_tensor_value(self, self.evaluate(output_a), + self.evaluate(output_a_embedded)) + _assert_sparse_tensor_value(self, self.evaluate(output_b), + self.evaluate(output_b_embedded)) @parameterized.named_parameters( { @@ -5757,110 +5700,202 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'use_safe_embedding_lookup': False, 'partition_variables': True, }) - @test_util.run_deprecated_v1 def test_get_dense_tensor(self, use_safe_embedding_lookup, partition_variables): - # Inputs. - vocabulary_size = 4 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - input_features = {'aaa': input_a, 'bbb': input_b} + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 4 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + input_features = {'aaa': input_a, 'bbb': input_b} - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.), # id 2 - (9., 13.) # id 3 - ) + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.), # id 2 + (9., 13.) # id 3 + ) - def _initializer(shape, dtype, partition_info=None): - if partition_variables: - self.assertEqual([vocabulary_size, embedding_dimension], - partition_info.full_shape) - self.assertAllEqual((2, embedding_dimension), shape) - else: - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertIsNone(partition_info) + def _initializer(shape, dtype, partition_info=None): + if partition_variables: + self.assertEqual([vocabulary_size, embedding_dimension], + partition_info.full_shape) + self.assertAllEqual((2, embedding_dimension), shape) + else: + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertIsNone(partition_info) - self.assertEqual(dtypes.float32, dtype) - return embedding_values + self.assertEqual(dtypes.float32, dtype) + return embedding_values - # Expected lookup result, using combiner='mean'. - expected_lookups_a = ( - # example 0: - (7., 11.), # ids [2], embedding = [7, 11] - # example 1: - (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] - ) - if use_safe_embedding_lookup: - expected_lookups_b = ( + # Expected lookup result, using combiner='mean'. + expected_lookups_a = ( # example 0: - (1., 2.), # ids [0], embedding = [1, 2] + (7., 11.), # ids [2], embedding = [7, 11] # example 1: - (0., 0.), # ids [], embedding = [0, 0] + (2., 3.5), # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5] + ) + if use_safe_embedding_lookup: + expected_lookups_b = ( + # example 0: + (1., 2.), # ids [0], embedding = [1, 2] + # example 1: + (0., 0.), # ids [], embedding = [0, 0] + ) + else: + expected_lookups_b = ( + # example 0: + (1., 2.), # ids [0], embedding = [1, 2] + ) + + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + + partitioner = None + if partition_variables: + partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) + + with variable_scope.variable_scope('vars', partitioner=partitioner): + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup) + # Provide sparse input and get dense result. + embedding_lookup_a = embedding_column_a.get_dense_tensor( + fc.FeatureTransformationCache(input_features), None) + embedding_lookup_b = embedding_column_b.get_dense_tensor( + fc.FeatureTransformationCache(input_features), None) + + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + if partition_variables: + self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0', + 'vars/aaa_bbb_shared_embedding/part_1:0'), + tuple([v.name for v in global_vars])) + else: + self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',), + tuple([v.name for v in global_vars])) + embedding_var = global_vars[0] + + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) + + self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) + self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) + self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) + + if use_safe_embedding_lookup: + self.assertIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + + @parameterized.named_parameters( + { + 'testcase_name': 'use_safe_embedding_lookup', + 'use_safe_embedding_lookup': True + }, { + 'testcase_name': 'dont_use_safe_embedding_lookup', + 'use_safe_embedding_lookup': False + }) + def test_get_dense_tensor_valid(self, use_safe_embedding_lookup): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, 1], # example 0, ids [2, 1] + [0, -1] + ]) # example 1, ids [0] + input_b = np.array([ + [1, -1], # example 0, ids [1] + [1, 2] + ]) # example 1, ids [1, 2] + input_features = {'aaa': input_a, 'bbb': input_b} + + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + # Expected lookup result, using combiner='mean'. + expected_lookups_a = ( + # example 0: + (5., 8.), # ids [2, 1], embedding = mean([3, 5] + [7, 11]) = [5, 8] + # example 1: + (1., 2), # ids [0], embedding = [1, 2] ) - else: expected_lookups_b = ( # example 0: - (1., 2.), # ids [0], embedding = [1, 2] + (3., 5.), # ids [1], embedding = [3, 5] + # example 1: + (5., 8.), # ids [1, 2], embedding = mean([3, 5] + [7, 11]) = [5, 8] ) - # Build columns. - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - - partitioner = None - if partition_variables: - partitioner = partitioned_variables.fixed_size_partitioner(2, axis=0) - - with variable_scope.variable_scope('vars', partitioner=partitioner): + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer, use_safe_embedding_lookup=use_safe_embedding_lookup) + # Provide sparse input and get dense result. embedding_lookup_a = embedding_column_a.get_dense_tensor( fc.FeatureTransformationCache(input_features), None) embedding_lookup_b = embedding_column_b.get_dense_tensor( fc.FeatureTransformationCache(input_features), None) - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - if partition_variables: - self.assertCountEqual(('vars/aaa_bbb_shared_embedding/part_0:0', - 'vars/aaa_bbb_shared_embedding/part_1:0'), + # Assert expected embedding variable and lookups. + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertCountEqual(('aaa_bbb_shared_embedding:0',), tuple([v.name for v in global_vars])) - else: - self.assertCountEqual(('vars/aaa_bbb_shared_embedding:0',), - tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] + embedding_var = global_vars[0] - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) + self.evaluate(variables_lib.global_variables_initializer()) + self.evaluate(lookup_ops.tables_initializer()) - self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) - self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) - self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) - - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) + self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) + self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) + self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) + if use_safe_embedding_lookup: + self.assertIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) @parameterized.named_parameters( { @@ -5870,186 +5905,99 @@ class SharedEmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'testcase_name': 'dont_use_safe_embedding_lookup', 'use_safe_embedding_lookup': False }) - @test_util.run_deprecated_v1 - def test_get_dense_tensor_valid(self, use_safe_embedding_lookup): - # Inputs. - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, 1], # example 0, ids [2, 1] - [0, -1] - ]) # example 1, ids [0] - input_b = np.array([ - [1, -1], # example 0, ids [1] - [1, 2] - ]) # example 1, ids [1, 2] - input_features = {'aaa': input_a, 'bbb': input_b} - - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) - - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values - - # Expected lookup result, using combiner='mean'. - expected_lookups_a = ( - # example 0: - (5., 8.), # ids [2, 1], embedding = mean([3, 5] + [7, 11]) = [5, 8] - # example 1: - (1., 2), # ids [0], embedding = [1, 2] - ) - expected_lookups_b = ( - # example 0: - (3., 5.), # ids [1], embedding = [3, 5] - # example 1: - (5., 8.), # ids [1, 2], embedding = mean([3, 5] + [7, 11]) = [5, 8] - ) - - # Build columns. - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) - - # Provide sparse input and get dense result. - embedding_lookup_a = embedding_column_a.get_dense_tensor( - fc.FeatureTransformationCache(input_features), None) - embedding_lookup_b = embedding_column_b.get_dense_tensor( - fc.FeatureTransformationCache(input_features), None) - - # Assert expected embedding variable and lookups. - global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) - self.assertCountEqual(('aaa_bbb_shared_embedding:0',), - tuple([v.name for v in global_vars])) - embedding_var = global_vars[0] - - self.evaluate(variables_lib.global_variables_initializer()) - self.evaluate(lookup_ops.tables_initializer()) - - self.assertAllEqual(embedding_values, self.evaluate(embedding_var)) - self.assertAllEqual(expected_lookups_a, self.evaluate(embedding_lookup_a)) - self.assertAllEqual(expected_lookups_b, self.evaluate(embedding_lookup_b)) - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - - @parameterized.named_parameters( - { - 'testcase_name': 'use_safe_embedding_lookup', - 'use_safe_embedding_lookup': True - }, { - 'testcase_name': 'dont_use_safe_embedding_lookup', - 'use_safe_embedding_lookup': False - }) - @test_util.run_deprecated_v1 def test_get_dense_tensor_placeholder_inputs(self, use_safe_embedding_lookup): - # Inputs. - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - # Specify shape, because dense input must have rank specified. - input_a_placeholder = array_ops.placeholder( - dtype=dtypes.int64, shape=[None, 3]) - input_b_placeholder = array_ops.placeholder( - dtype=dtypes.int64, shape=[None, 3]) - input_features = { - 'aaa': input_a_placeholder, - 'bbb': input_b_placeholder, - } - feed_dict = { - input_a_placeholder: input_a, - input_b_placeholder: input_b, - } + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + # Inputs. + vocabulary_size = 3 + # -1 values are ignored. + input_a = np.array([ + [2, -1, -1], # example 0, ids [2] + [0, 1, -1] + ]) # example 1, ids [0, 1] + input_b = np.array([ + [0, -1, -1], # example 0, ids [0] + [-1, -1, -1] + ]) # example 1, ids [] + # Specify shape, because dense input must have rank specified. + input_a_placeholder = array_ops.placeholder( + dtype=dtypes.int64, shape=[None, 3]) + input_b_placeholder = array_ops.placeholder( + dtype=dtypes.int64, shape=[None, 3]) + input_features = { + 'aaa': input_a_placeholder, + 'bbb': input_b_placeholder, + } + feed_dict = { + input_a_placeholder: input_a, + input_b_placeholder: input_b, + } - # Embedding variable. - embedding_dimension = 2 - embedding_values = ( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - ) + # Embedding variable. + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) - def _initializer(shape, dtype, partition_info=None): - self.assertAllEqual((vocabulary_size, embedding_dimension), shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return embedding_values + def _initializer(shape, dtype, partition_info=None): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values - # Build columns. - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer, - use_safe_embedding_lookup=use_safe_embedding_lookup) + # Build columns. + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=embedding_dimension, + initializer=_initializer, + use_safe_embedding_lookup=use_safe_embedding_lookup) - # Provide sparse input and get dense result. - embedding_lookup_a = embedding_column_a.get_dense_tensor( - fc.FeatureTransformationCache(input_features), None) - embedding_lookup_b = embedding_column_b.get_dense_tensor( - fc.FeatureTransformationCache(input_features), None) - if use_safe_embedding_lookup: - self.assertIn('SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) - else: - self.assertNotIn( - 'SparseFillEmptyRows', - [x.type for x in ops.get_default_graph().get_operations()]) + # Provide sparse input and get dense result. + embedding_lookup_a = embedding_column_a.get_dense_tensor( + fc.FeatureTransformationCache(input_features), None) + embedding_lookup_b = embedding_column_b.get_dense_tensor( + fc.FeatureTransformationCache(input_features), None) + if use_safe_embedding_lookup: + self.assertIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) + else: + self.assertNotIn( + 'SparseFillEmptyRows', + [x.type for x in ops.get_default_graph().get_operations()]) - with _initialized_session() as sess: - sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) + with _initialized_session() as sess: + sess.run([embedding_lookup_a, embedding_lookup_b], feed_dict=feed_dict) - @test_util.run_deprecated_v1 def test_serialization(self): + # SharedEmbeddingColumns are graph-only + with ops.Graph().as_default(): + def _initializer(shape, dtype, partition_info=None): + del shape, dtype, partition_info + return ValueError('Not expected to be called') - def _initializer(shape, dtype, partition_info=None): - del shape, dtype, partition_info - return ValueError('Not expected to be called') + categorical_column_a = fc.categorical_column_with_identity( + key='aaa', num_buckets=3) + categorical_column_b = fc.categorical_column_with_identity( + key='bbb', num_buckets=3) + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( + [categorical_column_a, categorical_column_b], + dimension=2, + initializer=_initializer) - categorical_column_a = fc.categorical_column_with_identity( - key='aaa', num_buckets=3) - categorical_column_b = fc.categorical_column_with_identity( - key='bbb', num_buckets=3) - embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( - [categorical_column_a, categorical_column_b], - dimension=2, - initializer=_initializer) - - self.assertEqual([categorical_column_a], embedding_column_a.parents) - self.assertEqual([categorical_column_b], embedding_column_b.parents) - # TODO(rohanj): Add tests for (from|get)_config once implemented + self.assertEqual([categorical_column_a], embedding_column_a.parents) + self.assertEqual([categorical_column_b], embedding_column_b.parents) + # TODO(rohanj): Add tests for (from|get)_config once implemented class WeightedCategoricalColumnTest(test.TestCase): - @test_util.run_deprecated_v1 def test_defaults(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( @@ -6070,7 +6018,6 @@ class WeightedCategoricalColumnTest(test.TestCase): weight_feature_key='values') self.assertFalse(column._is_v2_column) - @test_util.run_deprecated_v1 def test_deep_copy(self): """Tests deepcopy of categorical_column_with_hash_bucket.""" original = fc.weighted_categorical_column( @@ -6136,7 +6083,6 @@ class WeightedCategoricalColumnTest(test.TestCase): 'values is not in features dictionary'): fc._transform_features_v2({'ids': inputs}, (column,), None) - @test_util.run_deprecated_v1 def test_parse_example(self): a = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) @@ -6171,7 +6117,6 @@ class WeightedCategoricalColumnTest(test.TestCase): values=np.array([1., 10.], dtype=np.float32), dense_shape=[1, 2]), self.evaluate(features['weights'])) - @test_util.run_deprecated_v1 def test_transform_features(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( @@ -6204,7 +6149,6 @@ class WeightedCategoricalColumnTest(test.TestCase): values=np.array(weights.values, dtype=np.float32), dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 def test_transform_features_dense_input(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( @@ -6235,7 +6179,6 @@ class WeightedCategoricalColumnTest(test.TestCase): values=np.array(weights.values, dtype=np.float32), dense_shape=weights.dense_shape), self.evaluate(weight_tensor)) - @test_util.run_deprecated_v1 def test_transform_features_dense_weights(self): column = fc.weighted_categorical_column( categorical_column=fc.categorical_column_with_identity( @@ -6406,7 +6349,6 @@ class WeightedCategoricalColumnTest(test.TestCase): # TODO(ptucker): Add test with embedding of weighted categorical. - @test_util.run_deprecated_v1 def test_serialization(self): categorical_column = fc.categorical_column_with_identity( key='ids', num_buckets=3) From eb8f61f5f41c56b4cb267c549a192e8301eb7d30 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 11:23:45 -0700 Subject: [PATCH 128/178] Support variant of whitespace tokenizer PiperOrigin-RevId: 315522310 Change-Id: I44e3fa1563b5f46445602eb6495941c00d7ce4b8 --- .../compiler/mlir/lite/tests/fuse-tftext.mlir | 3196 ++++++++++++++++- .../compiler/mlir/lite/utils/tftext_utils.cc | 73 +- 2 files changed, 3237 insertions(+), 32 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir index f08ac0e1027..db2cce197be 100644 --- a/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir +++ b/tensorflow/compiler/mlir/lite/tests/fuse-tftext.mlir @@ -1,14 +1,3196 @@ // RUN: tf-opt -tfl-prepare-composite-funcs-tf -tfl-fuse-tftext=true %s -split-input-file | FileCheck %s --dump-input-on-failure module { - func @_whitespace_func(%arg0: tensor<1x!tf.string>) -> (tensor, tensor) attributes {tf._GrapplerSpecializedFunc = true, tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { - %0 = "tf.op1"(%arg0) : (tensor<1x!tf.string>) -> (tensor) - %1 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor - %2:2 = "tf.op2"(%arg0, %1) : (tensor<1x!tf.string>, tensor) -> (tensor, tensor) - return %2#0, %2#1 : tensor, tensor + func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> + %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %2 = "tf.Const"() {value = dense : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200} : (tensor, tensor, tensor, tensor) -> tensor + %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor + %19 = "tf.StringLength"(%arg0) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> + %20 = "tf.ExpandDims"(%19, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> + %21 = "tf.Cast"(%20) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> + %22 = "tf.Reshape"(%21, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> + %23 = "tf.Reshape"(%arg0, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> + %24:3 = "tf.UnicodeDecodeWithOffsets"(%23) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) + %25 = "tf.StridedSlice"(%24#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %26 = "tf.AddV2"(%25, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> + %27 = "tf.StridedSlice"(%24#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %28 = "tf.Minimum"(%26, %27) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> + %29:2 = "tf.RaggedRange"(%28, %27, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) + %30 = "tf.StridedSlice"(%29#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %31 = "tf.AddV2"(%30, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %32 = "tf.ConcatV2"(%29#0, %31, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> + %33 = "tf.GatherV2"(%24#2, %29#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %34 = "tf.ConcatV2"(%33, %22, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor + %35:2 = "tf.RaggedGather"(%32, %34, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) + %36:5 = "tf.WhitespaceTokenizeWithOffsets"(%24#1, %24#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) + %37 = "tf.StridedSlice"(%36#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %38 = "tf.Equal"(%37, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %39 = "tf.All"(%38, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %40 = "tf.If"(%39, %39, %37, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960} : (tensor, tensor, tensor, tensor) -> tensor + %41 = "tf.Identity"(%40) {device = ""} : (tensor) -> tensor + %42 = "tf.StridedSlice"(%36#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %43 = "tf.StridedSlice"(%36#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %44 = "tf.Sub"(%42, %43) {device = ""} : (tensor, tensor) -> tensor + %45 = "tf.LessEqual"(%10, %44) {device = ""} : (tensor, tensor) -> tensor + %46 = "tf.All"(%45, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %47 = "tf.If"(%46, %46, %44) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320} : (tensor, tensor, tensor) -> tensor + %48 = "tf.Identity"(%47) {device = ""} : (tensor) -> tensor + %49 = "tf.Identity"(%36#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %50 = "tf.StridedSlice"(%49, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %51 = "tf.Shape"(%36#0) {device = ""} : (tensor) -> tensor<1xi64> + %52 = "tf.StridedSlice"(%51, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %53 = "tf.Equal"(%50, %52) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %54 = "tf.All"(%53, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %55 = "tf.If"(%54, %54, %50, %52) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660} : (tensor, tensor, tensor, tensor) -> tensor + %56 = "tf.Identity"(%55) {device = ""} : (tensor) -> tensor + %57 = "tf.Identity"(%49) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %58 = "tf.Shape"(%57) {device = ""} : (tensor) -> tensor<1xi64> + %59 = "tf.StridedSlice"(%58, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %60 = "tf.Sub"(%59, %13) {device = ""} : (tensor, tensor) -> tensor + %61 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %62 = "tf.Equal"(%61, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %63 = "tf.All"(%62, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %64 = "tf.If"(%63, %63, %61, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030} : (tensor, tensor, tensor, tensor) -> tensor + %65 = "tf.Identity"(%64) {device = ""} : (tensor) -> tensor + %66 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %67 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.Sub"(%66, %67) {device = ""} : (tensor, tensor) -> tensor + %69 = "tf.LessEqual"(%10, %68) {device = ""} : (tensor, tensor) -> tensor + %70 = "tf.All"(%69, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %71 = "tf.If"(%70, %70, %68) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390} : (tensor, tensor, tensor) -> tensor + %72 = "tf.Identity"(%71) {device = ""} : (tensor) -> tensor + %73 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %74 = "tf.StridedSlice"(%73, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %75 = "tf.Equal"(%74, %60) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %76 = "tf.All"(%75, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %77 = "tf.If"(%76, %76, %74, %60) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750} : (tensor, tensor, tensor, tensor) -> tensor + %78 = "tf.Identity"(%77) {device = ""} : (tensor) -> tensor + %79 = "tf.Identity"(%73) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %80 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %81 = "tf.Equal"(%80, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %82 = "tf.All"(%81, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %83 = "tf.If"(%82, %82, %80, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100} : (tensor, tensor, tensor, tensor) -> tensor + %84 = "tf.Identity"(%83) {device = ""} : (tensor) -> tensor + %85 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %86 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %87 = "tf.Sub"(%85, %86) {device = ""} : (tensor, tensor) -> tensor + %88 = "tf.LessEqual"(%10, %87) {device = ""} : (tensor, tensor) -> tensor + %89 = "tf.All"(%88, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %90 = "tf.If"(%89, %89, %87) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460} : (tensor, tensor, tensor) -> tensor + %91 = "tf.Identity"(%90) {device = ""} : (tensor) -> tensor + %92 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %93 = "tf.StridedSlice"(%92, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %94 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi64> + %95 = "tf.StridedSlice"(%94, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %96 = "tf.Equal"(%93, %95) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %97 = "tf.All"(%96, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %98 = "tf.If"(%97, %97, %93, %95) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800} : (tensor, tensor, tensor, tensor) -> tensor + %99 = "tf.Identity"(%98) {device = ""} : (tensor) -> tensor + %100 = "tf.Identity"(%92) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> + %102 = "tf.StridedSlice"(%101, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %103 = "tf.Sub"(%102, %13) {device = ""} : (tensor, tensor) -> tensor + %104 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %105 = "tf.LogicalOr"(%104, %2) {device = ""} : (tensor, tensor) -> tensor + %106 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %107 = "tf.LogicalOr"(%105, %106) {device = ""} : (tensor, tensor) -> tensor + %108 = "tf.StridedSlice"(%100, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %109 = "tf.StridedSlice"(%100, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.Sub"(%108, %109) {device = ""} : (tensor, tensor) -> tensor + %111 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi64> + %112 = "tf.StridedSlice"(%111, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %113 = "tf.Sub"(%112, %13) {device = ""} : (tensor, tensor) -> tensor + %114 = "tf.Equal"(%113, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %115 = "tf.ExpandDims"(%100, %7) {device = ""} : (tensor, tensor) -> tensor + %116 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<1xi32> + %117 = "tf.StridedSlice"(%116, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %118 = "tf.StridedSlice"(%116, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %119 = "tf.StridedSlice"(%116, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %120 = "tf.StridedSlice"(%36#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %121 = "tf.Equal"(%120, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %122 = "tf.All"(%121, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %123 = "tf.If"(%122, %122, %120, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170} : (tensor, tensor, tensor, tensor) -> tensor + %124 = "tf.Identity"(%123) {device = ""} : (tensor) -> tensor + %125 = "tf.StridedSlice"(%36#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %126 = "tf.StridedSlice"(%36#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.Sub"(%125, %126) {device = ""} : (tensor, tensor) -> tensor + %128 = "tf.LessEqual"(%10, %127) {device = ""} : (tensor, tensor) -> tensor + %129 = "tf.All"(%128, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %130 = "tf.If"(%129, %129, %127) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530} : (tensor, tensor, tensor) -> tensor + %131 = "tf.Identity"(%130) {device = ""} : (tensor) -> tensor + %132 = "tf.Identity"(%36#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %133 = "tf.StridedSlice"(%132, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %134 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi64> + %135 = "tf.StridedSlice"(%134, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %136 = "tf.Equal"(%133, %135) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %137 = "tf.All"(%136, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %138 = "tf.If"(%137, %137, %133, %135) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870} : (tensor, tensor, tensor, tensor) -> tensor + %139 = "tf.Identity"(%138) {device = ""} : (tensor) -> tensor + %140 = "tf.Identity"(%132) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %141 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> + %142 = "tf.StridedSlice"(%141, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %143 = "tf.Sub"(%142, %13) {device = ""} : (tensor, tensor) -> tensor + %144 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %145 = "tf.LogicalOr"(%144, %2) {device = ""} : (tensor, tensor) -> tensor + %146 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %147 = "tf.LogicalOr"(%145, %146) {device = ""} : (tensor, tensor) -> tensor + %148 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %149 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %150 = "tf.Sub"(%148, %149) {device = ""} : (tensor, tensor) -> tensor + %151 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi64> + %152 = "tf.StridedSlice"(%151, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %153 = "tf.Sub"(%152, %13) {device = ""} : (tensor, tensor) -> tensor + %154 = "tf.Equal"(%153, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %155 = "tf.ExpandDims"(%140, %7) {device = ""} : (tensor, tensor) -> tensor + %156 = "tf.Shape"(%140) {device = ""} : (tensor) -> tensor<1xi32> + %157 = "tf.StridedSlice"(%156, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %158 = "tf.StridedSlice"(%156, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %159 = "tf.StridedSlice"(%156, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %160 = "tf.StridedSlice"(%140, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %161 = "tf.Range"(%10, %160, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %162 = "tf.StridedSlice"(%140, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %163 = "tf.StridedSlice"(%140, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.Sub"(%162, %163) {device = ""} : (tensor, tensor) -> tensor + %165 = "tf.If"(%107, %107, %13, %103) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670} : (tensor, tensor, tensor, tensor) -> tensor + %166 = "tf.Identity"(%165) {device = ""} : (tensor) -> tensor + %167 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %168 = "tf.Select"(%167, %13, %103) {device = ""} : (tensor, tensor, tensor) -> tensor + %169 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %170 = "tf.LogicalOr"(%169, %2) {device = ""} : (tensor, tensor) -> tensor + %171 = "tf.Equal"(%168, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %172 = "tf.LogicalOr"(%170, %171) {device = ""} : (tensor, tensor) -> tensor + %173 = "tf.Select"(%114, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %174 = "tf.Pack"(%173, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %175 = "tf.StridedSlice"(%174, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %176 = "tf.Cast"(%175) {Truncate = false, device = ""} : (tensor) -> tensor + %177 = "tf.Reshape"(%176, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %178 = "tf.Pack"(%7, %177) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %179 = "tf.Tile"(%115, %178) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %180 = "tf.Mul"(%177, %118) {device = ""} : (tensor, tensor) -> tensor + %181 = "tf.Pack"(%180) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %182 = "tf.ConcatV2"(%117, %181, %119, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %183 = "tf.Reshape"(%179, %182) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %184 = "tf.Shape"(%183) {device = ""} : (tensor) -> tensor<1xi64> + %185 = "tf.StridedSlice"(%184, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %186 = "tf.Pack"(%175) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %187 = "tf.StridedSlice"(%183, %186, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %188 = "tf.Sub"(%185, %175) {device = ""} : (tensor, tensor) -> tensor + %189 = "tf.Pack"(%188) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %190 = "tf.StridedSlice"(%183, %11, %189, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %191:2 = "tf.RaggedRange"(%190, %187, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %192 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %193 = "tf.Pack"(%192, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %194 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %195 = "tf.Cast"(%194) {Truncate = false, device = ""} : (tensor) -> tensor + %196 = "tf.Reshape"(%195, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %197 = "tf.Pack"(%7, %196) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %198 = "tf.Tile"(%4, %197) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %199 = "tf.Mul"(%196, %8) {device = ""} : (tensor, tensor) -> tensor + %200 = "tf.Pack"(%199) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %201 = "tf.ConcatV2"(%9, %200, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %202 = "tf.Reshape"(%198, %201) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %203 = "tf.Shape"(%202) {device = ""} : (tensor) -> tensor<1xi64> + %204 = "tf.StridedSlice"(%203, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %205 = "tf.Pack"(%194) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %206 = "tf.StridedSlice"(%202, %205, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %207 = "tf.Sub"(%204, %194) {device = ""} : (tensor, tensor) -> tensor + %208 = "tf.Pack"(%207) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %209 = "tf.StridedSlice"(%202, %11, %208, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %210:2 = "tf.RaggedRange"(%209, %206, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %211 = "tf.StridedSlice"(%193, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %212 = "tf.StridedSlice"(%193, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %213 = "tf.Mul"(%212, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %214 = "tf.Tile"(%213, %211) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %215 = "tf.Cumsum"(%214, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %216 = "tf.ConcatV2"(%11, %215, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %217 = "tf.StridedSlice"(%216, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %218 = "tf.ExpandDims"(%217, %7) {device = ""} : (tensor, tensor) -> tensor + %219 = "tf.Shape"(%217) {device = ""} : (tensor) -> tensor<1xi32> + %220 = "tf.StridedSlice"(%219, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %221 = "tf.Pack"(%220) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %222 = "tf.StridedSlice"(%216, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %223 = "tf.ExpandDims"(%222, %7) {device = ""} : (tensor, tensor) -> tensor + %224 = "tf.Shape"(%222) {device = ""} : (tensor) -> tensor<1xi32> + %225 = "tf.StridedSlice"(%224, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %226 = "tf.Pack"(%225) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %227 = "tf.Equal"(%103, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %228 = "tf.Select"(%227, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %229 = "tf.Cast"(%228) {Truncate = false, device = ""} : (tensor) -> tensor + %230 = "tf.Reshape"(%229, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %231 = "tf.Pack"(%7, %230) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %232 = "tf.Mul"(%230, %8) {device = ""} : (tensor, tensor) -> tensor + %233 = "tf.Pack"(%232) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %234 = "tf.ConcatV2"(%9, %233, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %235 = "tf.Pack"(%228) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %236 = "tf.Pack"(%10, %103) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %237 = "tf.ExpandDims"(%236, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %238 = "tf.Tile"(%237, %231) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %239 = "tf.Reshape"(%238, %234) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %240 = "tf.Shape"(%239) {device = ""} : (tensor) -> tensor<1xi64> + %241 = "tf.StridedSlice"(%240, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %242 = "tf.Sub"(%241, %228) {device = ""} : (tensor, tensor) -> tensor + %243 = "tf.Pack"(%242) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %244 = "tf.StridedSlice"(%239, %11, %243, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %245 = "tf.StridedSlice"(%239, %235, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %246:2 = "tf.RaggedRange"(%244, %245, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %247 = "tf.GatherV2"(%110, %246#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %248 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor + %249 = "tf.BroadcastTo"(%248, %221) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %250 = "tf.Max"(%249, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %251 = "tf.Maximum"(%14, %250) {device = ""} : (tensor, tensor) -> tensor + %252 = "tf.Range"(%14, %251, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %253 = "tf.Pack"(%7, %251) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %254 = "tf.Tile"(%218, %253) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %255 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %256 = "tf.StridedSlice"(%255, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %257 = "tf.Prod"(%256, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %258 = "tf.Pack"(%257) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %259 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %260 = "tf.StridedSlice"(%259, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %261 = "tf.Shape"(%254) {device = ""} : (tensor) -> tensor<2xi32> + %262 = "tf.StridedSlice"(%261, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %263 = "tf.ConcatV2"(%260, %258, %262, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %264 = "tf.Reshape"(%254, %263) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %265 = "tf.ExpandDims"(%249, %3) {device = ""} : (tensor, tensor) -> tensor + %266 = "tf.Less"(%252, %265) {device = ""} : (tensor, tensor) -> tensor + %267 = "tf.Reshape"(%266, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %268 = "tf.Where"(%267) {device = ""} : (tensor) -> tensor + %269 = "tf.Squeeze"(%268) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %270 = "tf.GatherV2"(%264, %269, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %271 = "tf.Cast"(%247) {Truncate = false, device = ""} : (tensor) -> tensor + %272 = "tf.BroadcastTo"(%271, %226) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %273 = "tf.Max"(%272, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %274 = "tf.Maximum"(%14, %273) {device = ""} : (tensor, tensor) -> tensor + %275 = "tf.Range"(%14, %274, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %276 = "tf.Pack"(%7, %274) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %277 = "tf.Tile"(%223, %276) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %278 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %279 = "tf.StridedSlice"(%278, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %280 = "tf.Prod"(%279, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %281 = "tf.Pack"(%280) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %282 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %283 = "tf.StridedSlice"(%282, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %284 = "tf.Shape"(%277) {device = ""} : (tensor) -> tensor<2xi32> + %285 = "tf.StridedSlice"(%284, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %286 = "tf.ConcatV2"(%283, %281, %285, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %287 = "tf.Reshape"(%277, %286) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %288 = "tf.ExpandDims"(%272, %3) {device = ""} : (tensor, tensor) -> tensor + %289 = "tf.Less"(%275, %288) {device = ""} : (tensor, tensor) -> tensor + %290 = "tf.Reshape"(%289, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %291 = "tf.Where"(%290) {device = ""} : (tensor) -> tensor + %292 = "tf.Squeeze"(%291) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %293 = "tf.GatherV2"(%287, %292, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %294:2 = "tf.RaggedRange"(%270, %293, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %295 = "tf.If"(%172, %172, %168, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740} : (tensor, tensor, tensor, tensor) -> tensor + %296 = "tf.Identity"(%295) {device = ""} : (tensor) -> tensor + %297 = "tf.Select"(%2, %168, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %298 = "tf.Pack"(%297) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %299 = "tf.ConcatV2"(%1, %298, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %300 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %301 = "tf.Equal"(%300, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %302 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %303 = "tf.StridedSlice"(%299, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %304 = "tf.Equal"(%303, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %305 = "tf.If"(%304, %304, %303, %247) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230} : (tensor, tensor, tensor, tensor) -> tensor + %306 = "tf.Identity"(%305) {device = ""} : (tensor) -> tensor + %307 = "tf.If"(%301, %301, %247, %302) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590} : (tensor, tensor, tensor, tensor) -> tensor + %308 = "tf.If"(%147, %147, %13, %143) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_AssertGuard_false_15300, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_AssertGuard_true_15290} : (tensor, tensor, tensor, tensor) -> tensor + %309 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor + %310 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %311 = "tf.Select"(%310, %13, %143) {device = ""} : (tensor, tensor, tensor) -> tensor + %312 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %313 = "tf.LogicalOr"(%312, %2) {device = ""} : (tensor, tensor) -> tensor + %314 = "tf.Equal"(%311, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %315 = "tf.LogicalOr"(%313, %314) {device = ""} : (tensor, tensor) -> tensor + %316 = "tf.Select"(%154, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %317 = "tf.Pack"(%316, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %318 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %319 = "tf.Cast"(%318) {Truncate = false, device = ""} : (tensor) -> tensor + %320 = "tf.Reshape"(%319, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %321 = "tf.Pack"(%7, %320) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %322 = "tf.Tile"(%155, %321) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %323 = "tf.Mul"(%320, %158) {device = ""} : (tensor, tensor) -> tensor + %324 = "tf.Pack"(%323) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %325 = "tf.ConcatV2"(%157, %324, %159, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %326 = "tf.Reshape"(%322, %325) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %327 = "tf.Shape"(%326) {device = ""} : (tensor) -> tensor<1xi64> + %328 = "tf.StridedSlice"(%327, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %329 = "tf.Pack"(%318) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %330 = "tf.StridedSlice"(%326, %329, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %331 = "tf.Sub"(%328, %318) {device = ""} : (tensor, tensor) -> tensor + %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %333 = "tf.StridedSlice"(%326, %11, %332, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %334:2 = "tf.RaggedRange"(%333, %330, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %335 = "tf.GatherV2"(%161, %334#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %336 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %337 = "tf.StridedSlice"(%317, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %338 = "tf.StridedSlice"(%317, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %339 = "tf.ConcatV2"(%337, %338, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %340 = "tf.StridedSlice"(%317, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %341 = "tf.Mul"(%164, %340) {device = ""} : (tensor, tensor) -> tensor + %342 = "tf.Tile"(%341, %336) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %343 = "tf.Cumsum"(%342, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %344 = "tf.ConcatV2"(%11, %343, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %345 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> + %346 = "tf.StridedSlice"(%345, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %347 = "tf.Sub"(%346, %13) {device = ""} : (tensor, tensor) -> tensor + %348 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %349 = "tf.LogicalOr"(%348, %2) {device = ""} : (tensor, tensor) -> tensor + %350 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %351 = "tf.LogicalOr"(%349, %350) {device = ""} : (tensor, tensor) -> tensor + %352 = "tf.StridedSlice"(%344, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %353 = "tf.StridedSlice"(%344, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %354 = "tf.Sub"(%352, %353) {device = ""} : (tensor, tensor) -> tensor + %355 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi64> + %356 = "tf.StridedSlice"(%355, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %357 = "tf.Sub"(%356, %13) {device = ""} : (tensor, tensor) -> tensor + %358 = "tf.Equal"(%357, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %359 = "tf.ExpandDims"(%344, %7) {device = ""} : (tensor, tensor) -> tensor + %360 = "tf.Shape"(%344) {device = ""} : (tensor) -> tensor<1xi32> + %361 = "tf.StridedSlice"(%360, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %362 = "tf.StridedSlice"(%360, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %363 = "tf.StridedSlice"(%360, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %364 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %365 = "tf.Pack"(%364, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %366 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %367 = "tf.Cast"(%366) {Truncate = false, device = ""} : (tensor) -> tensor + %368 = "tf.Reshape"(%367, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %369 = "tf.Pack"(%7, %368) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %370 = "tf.Tile"(%4, %369) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %371 = "tf.Mul"(%368, %8) {device = ""} : (tensor, tensor) -> tensor + %372 = "tf.Pack"(%371) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %373 = "tf.ConcatV2"(%9, %372, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %374 = "tf.Reshape"(%370, %373) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %375 = "tf.Shape"(%374) {device = ""} : (tensor) -> tensor<1xi64> + %376 = "tf.StridedSlice"(%375, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %377 = "tf.Pack"(%366) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %378 = "tf.StridedSlice"(%374, %377, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %379 = "tf.Sub"(%376, %366) {device = ""} : (tensor, tensor) -> tensor + %380 = "tf.Pack"(%379) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %381 = "tf.StridedSlice"(%374, %11, %380, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %382:2 = "tf.RaggedRange"(%381, %378, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %383 = "tf.GatherV2"(%11, %382#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %384 = "tf.GatherV2"(%12, %383, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %385 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %386 = "tf.StridedSlice"(%365, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %387 = "tf.StridedSlice"(%365, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %388 = "tf.ConcatV2"(%386, %387, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %389 = "tf.Tile"(%384, %388) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %390 = "tf.StridedSlice"(%365, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %391 = "tf.Mul"(%390, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %392 = "tf.Tile"(%391, %385) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %393 = "tf.Cumsum"(%392, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %394 = "tf.ConcatV2"(%11, %393, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %395 = "tf.StridedSlice"(%394, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %396 = "tf.ExpandDims"(%395, %7) {device = ""} : (tensor, tensor) -> tensor + %397 = "tf.Shape"(%395) {device = ""} : (tensor) -> tensor<1xi32> + %398 = "tf.StridedSlice"(%397, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %399 = "tf.Pack"(%398) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %400 = "tf.StridedSlice"(%394, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %401 = "tf.ExpandDims"(%400, %7) {device = ""} : (tensor, tensor) -> tensor + %402 = "tf.Shape"(%400) {device = ""} : (tensor) -> tensor<1xi32> + %403 = "tf.StridedSlice"(%402, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %404 = "tf.Pack"(%403) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %405 = "tf.Equal"(%143, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %406 = "tf.Select"(%405, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %407 = "tf.Cast"(%406) {Truncate = false, device = ""} : (tensor) -> tensor + %408 = "tf.Reshape"(%407, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %409 = "tf.Pack"(%7, %408) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %410 = "tf.Mul"(%408, %8) {device = ""} : (tensor, tensor) -> tensor + %411 = "tf.Pack"(%410) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %412 = "tf.ConcatV2"(%9, %411, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %413 = "tf.Pack"(%406) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %414 = "tf.Pack"(%10, %143) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %415 = "tf.ExpandDims"(%414, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %416 = "tf.Tile"(%415, %409) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %417 = "tf.Reshape"(%416, %412) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %418 = "tf.Shape"(%417) {device = ""} : (tensor) -> tensor<1xi64> + %419 = "tf.StridedSlice"(%418, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %420 = "tf.Sub"(%419, %406) {device = ""} : (tensor, tensor) -> tensor + %421 = "tf.Pack"(%420) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %422 = "tf.StridedSlice"(%417, %11, %421, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %423 = "tf.StridedSlice"(%417, %413, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424:2 = "tf.RaggedRange"(%422, %423, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %425 = "tf.GatherV2"(%150, %424#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %426 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor + %427 = "tf.BroadcastTo"(%426, %399) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %428 = "tf.Max"(%427, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %429 = "tf.Maximum"(%14, %428) {device = ""} : (tensor, tensor) -> tensor + %430 = "tf.Range"(%14, %429, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %431 = "tf.Pack"(%7, %429) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %432 = "tf.Tile"(%396, %431) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %433 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %434 = "tf.StridedSlice"(%433, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %435 = "tf.Prod"(%434, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %436 = "tf.Pack"(%435) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %437 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %438 = "tf.StridedSlice"(%437, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %439 = "tf.Shape"(%432) {device = ""} : (tensor) -> tensor<2xi32> + %440 = "tf.StridedSlice"(%439, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %441 = "tf.ConcatV2"(%438, %436, %440, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %442 = "tf.Reshape"(%432, %441) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %443 = "tf.ExpandDims"(%427, %3) {device = ""} : (tensor, tensor) -> tensor + %444 = "tf.Less"(%430, %443) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.Reshape"(%444, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %446 = "tf.Where"(%445) {device = ""} : (tensor) -> tensor + %447 = "tf.Squeeze"(%446) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %448 = "tf.GatherV2"(%442, %447, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %449 = "tf.Cast"(%425) {Truncate = false, device = ""} : (tensor) -> tensor + %450 = "tf.BroadcastTo"(%449, %404) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %451 = "tf.Max"(%450, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %452 = "tf.Maximum"(%14, %451) {device = ""} : (tensor, tensor) -> tensor + %453 = "tf.Range"(%14, %452, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %454 = "tf.Pack"(%7, %452) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %455 = "tf.Tile"(%401, %454) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %456 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %457 = "tf.StridedSlice"(%456, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %458 = "tf.Prod"(%457, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %459 = "tf.Pack"(%458) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %460 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %461 = "tf.StridedSlice"(%460, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %462 = "tf.Shape"(%455) {device = ""} : (tensor) -> tensor<2xi32> + %463 = "tf.StridedSlice"(%462, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %464 = "tf.ConcatV2"(%461, %459, %463, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %465 = "tf.Reshape"(%455, %464) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %466 = "tf.ExpandDims"(%450, %3) {device = ""} : (tensor, tensor) -> tensor + %467 = "tf.Less"(%453, %466) {device = ""} : (tensor, tensor) -> tensor + %468 = "tf.Reshape"(%467, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %469 = "tf.Where"(%468) {device = ""} : (tensor) -> tensor + %470 = "tf.Squeeze"(%469) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %471 = "tf.GatherV2"(%465, %470, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %472:2 = "tf.RaggedRange"(%448, %471, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %473 = "tf.GatherV2"(%389, %472#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %474 = "tf.If"(%315, %315, %311, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_1_AssertGuard_false_16370, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_1_AssertGuard_true_16360} : (tensor, tensor, tensor, tensor) -> tensor + %475 = "tf.Identity"(%474) {device = ""} : (tensor) -> tensor + %476 = "tf.Select"(%2, %311, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %477 = "tf.Pack"(%476) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %478 = "tf.ConcatV2"(%1, %477, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %479 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %480 = "tf.Equal"(%479, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %481 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %482 = "tf.StridedSlice"(%478, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %483 = "tf.Equal"(%482, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %484 = "tf.If"(%483, %483, %482, %425) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_2_AssertGuard_false_16860, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_2_AssertGuard_true_16850} : (tensor, tensor, tensor, tensor) -> tensor + %485 = "tf.Identity"(%484) {device = ""} : (tensor) -> tensor + %486 = "tf.If"(%480, %480, %425, %481) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_Assert_3_AssertGuard_false_17220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_Assert_3_AssertGuard_true_17210} : (tensor, tensor, tensor, tensor) -> tensor + %487 = "tf.Identity"(%486) {device = ""} : (tensor) -> tensor + %488 = "tf.If"(%351, %351, %13, %347) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890} : (tensor, tensor, tensor, tensor) -> tensor + %489 = "tf.Identity"(%488) {device = ""} : (tensor) -> tensor + %490 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %491 = "tf.Select"(%490, %13, %347) {device = ""} : (tensor, tensor, tensor) -> tensor + %492 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %493 = "tf.LogicalOr"(%492, %2) {device = ""} : (tensor, tensor) -> tensor + %494 = "tf.Equal"(%491, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %495 = "tf.LogicalOr"(%493, %494) {device = ""} : (tensor, tensor) -> tensor + %496 = "tf.Select"(%358, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %497 = "tf.Pack"(%496, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %498 = "tf.StridedSlice"(%497, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %499 = "tf.Cast"(%498) {Truncate = false, device = ""} : (tensor) -> tensor + %500 = "tf.Reshape"(%499, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %501 = "tf.Pack"(%7, %500) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %502 = "tf.Tile"(%359, %501) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %503 = "tf.Mul"(%500, %362) {device = ""} : (tensor, tensor) -> tensor + %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %505 = "tf.ConcatV2"(%361, %504, %363, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %506 = "tf.Reshape"(%502, %505) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %507 = "tf.Shape"(%506) {device = ""} : (tensor) -> tensor<1xi64> + %508 = "tf.StridedSlice"(%507, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %509 = "tf.Pack"(%498) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %510 = "tf.StridedSlice"(%506, %509, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %511 = "tf.Sub"(%508, %498) {device = ""} : (tensor, tensor) -> tensor + %512 = "tf.Pack"(%511) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %513 = "tf.StridedSlice"(%506, %11, %512, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %514:2 = "tf.RaggedRange"(%513, %510, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %515 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %516 = "tf.Pack"(%515, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %517 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %518 = "tf.Cast"(%517) {Truncate = false, device = ""} : (tensor) -> tensor + %519 = "tf.Reshape"(%518, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %520 = "tf.Pack"(%7, %519) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %521 = "tf.Tile"(%4, %520) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %522 = "tf.Mul"(%519, %8) {device = ""} : (tensor, tensor) -> tensor + %523 = "tf.Pack"(%522) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %524 = "tf.ConcatV2"(%9, %523, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %525 = "tf.Reshape"(%521, %524) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<1xi64> + %527 = "tf.StridedSlice"(%526, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %528 = "tf.Pack"(%517) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %529 = "tf.StridedSlice"(%525, %528, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %530 = "tf.Sub"(%527, %517) {device = ""} : (tensor, tensor) -> tensor + %531 = "tf.Pack"(%530) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %532 = "tf.StridedSlice"(%525, %11, %531, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %533:2 = "tf.RaggedRange"(%532, %529, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %534 = "tf.StridedSlice"(%516, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %535 = "tf.StridedSlice"(%516, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %536 = "tf.Mul"(%535, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %537 = "tf.Tile"(%536, %534) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %538 = "tf.Cumsum"(%537, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %539 = "tf.ConcatV2"(%11, %538, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %540 = "tf.StridedSlice"(%539, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %541 = "tf.ExpandDims"(%540, %7) {device = ""} : (tensor, tensor) -> tensor + %542 = "tf.Shape"(%540) {device = ""} : (tensor) -> tensor<1xi32> + %543 = "tf.StridedSlice"(%542, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %544 = "tf.Pack"(%543) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %545 = "tf.StridedSlice"(%539, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %546 = "tf.ExpandDims"(%545, %7) {device = ""} : (tensor, tensor) -> tensor + %547 = "tf.Shape"(%545) {device = ""} : (tensor) -> tensor<1xi32> + %548 = "tf.StridedSlice"(%547, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %549 = "tf.Pack"(%548) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %550 = "tf.Equal"(%347, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %551 = "tf.Select"(%550, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %552 = "tf.Cast"(%551) {Truncate = false, device = ""} : (tensor) -> tensor + %553 = "tf.Reshape"(%552, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %554 = "tf.Pack"(%7, %553) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %555 = "tf.Mul"(%553, %8) {device = ""} : (tensor, tensor) -> tensor + %556 = "tf.Pack"(%555) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %557 = "tf.ConcatV2"(%9, %556, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %558 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %559 = "tf.Pack"(%10, %347) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %560 = "tf.ExpandDims"(%559, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %561 = "tf.Tile"(%560, %554) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %562 = "tf.Reshape"(%561, %557) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %563 = "tf.Shape"(%562) {device = ""} : (tensor) -> tensor<1xi64> + %564 = "tf.StridedSlice"(%563, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %565 = "tf.Sub"(%564, %551) {device = ""} : (tensor, tensor) -> tensor + %566 = "tf.Pack"(%565) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %567 = "tf.StridedSlice"(%562, %11, %566, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %568 = "tf.StridedSlice"(%562, %558, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %569:2 = "tf.RaggedRange"(%567, %568, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %570 = "tf.GatherV2"(%354, %569#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %571 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor + %572 = "tf.BroadcastTo"(%571, %544) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %573 = "tf.Max"(%572, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %574 = "tf.Maximum"(%14, %573) {device = ""} : (tensor, tensor) -> tensor + %575 = "tf.Range"(%14, %574, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %576 = "tf.Pack"(%7, %574) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %577 = "tf.Tile"(%541, %576) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %578 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %579 = "tf.StridedSlice"(%578, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %580 = "tf.Prod"(%579, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %581 = "tf.Pack"(%580) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %582 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %583 = "tf.StridedSlice"(%582, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %584 = "tf.Shape"(%577) {device = ""} : (tensor) -> tensor<2xi32> + %585 = "tf.StridedSlice"(%584, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %586 = "tf.ConcatV2"(%583, %581, %585, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %587 = "tf.Reshape"(%577, %586) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %588 = "tf.ExpandDims"(%572, %3) {device = ""} : (tensor, tensor) -> tensor + %589 = "tf.Less"(%575, %588) {device = ""} : (tensor, tensor) -> tensor + %590 = "tf.Reshape"(%589, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %591 = "tf.Where"(%590) {device = ""} : (tensor) -> tensor + %592 = "tf.Squeeze"(%591) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %593 = "tf.GatherV2"(%587, %592, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %594 = "tf.Cast"(%570) {Truncate = false, device = ""} : (tensor) -> tensor + %595 = "tf.BroadcastTo"(%594, %549) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %596 = "tf.Max"(%595, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %597 = "tf.Maximum"(%14, %596) {device = ""} : (tensor, tensor) -> tensor + %598 = "tf.Range"(%14, %597, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %599 = "tf.Pack"(%7, %597) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %600 = "tf.Tile"(%546, %599) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %601 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %602 = "tf.StridedSlice"(%601, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %603 = "tf.Prod"(%602, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %604 = "tf.Pack"(%603) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %605 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %606 = "tf.StridedSlice"(%605, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %607 = "tf.Shape"(%600) {device = ""} : (tensor) -> tensor<2xi32> + %608 = "tf.StridedSlice"(%607, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %609 = "tf.ConcatV2"(%606, %604, %608, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %610 = "tf.Reshape"(%600, %609) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %611 = "tf.ExpandDims"(%595, %3) {device = ""} : (tensor, tensor) -> tensor + %612 = "tf.Less"(%598, %611) {device = ""} : (tensor, tensor) -> tensor + %613 = "tf.Reshape"(%612, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %614 = "tf.Where"(%613) {device = ""} : (tensor) -> tensor + %615 = "tf.Squeeze"(%614) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %616 = "tf.GatherV2"(%610, %615, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %617:2 = "tf.RaggedRange"(%593, %616, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %618 = "tf.If"(%495, %495, %491, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960} : (tensor, tensor, tensor, tensor) -> tensor + %619 = "tf.Identity"(%618) {device = ""} : (tensor) -> tensor + %620 = "tf.Select"(%2, %491, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %621 = "tf.Pack"(%620) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %622 = "tf.ConcatV2"(%1, %621, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %623 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %624 = "tf.Equal"(%623, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %625 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %626 = "tf.StridedSlice"(%622, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %627 = "tf.Equal"(%626, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %628 = "tf.If"(%627, %627, %626, %570) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450} : (tensor, tensor, tensor, tensor) -> tensor + %629 = "tf.Identity"(%628) {device = ""} : (tensor) -> tensor + %630 = "tf.If"(%624, %624, %570, %625) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810} : (tensor, tensor, tensor, tensor) -> tensor + %631 = "tf.Identity"(%79) {device = ""} : (tensor) -> tensor + %632 = "tf.Identity"(%630) {device = ""} : (tensor) -> tensor + %633 = "tf.Identity"(%307) {device = ""} : (tensor) -> tensor + %634 = "tf.Shape"(%36#2) {device = ""} : (tensor) -> tensor<1xi32> + %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %638 = "tf.Shape"(%36#3) {device = ""} : (tensor) -> tensor<1xi32> + %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %642 = "tf.GatherV2"(%36#3, %335, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %643 = "tf.Tile"(%642, %339) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %644 = "tf.Sub"(%643, %473) {device = ""} : (tensor, tensor) -> tensor + %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> + %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %649 = "tf.UnicodeEncode"(%36#0, %57) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor + return %650, %631 : tensor, tensor + } + func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3200(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4320(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5390(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6460(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6800(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7530(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9740(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10590(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_Assert_AssertGuard_false_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_Assert_AssertGuard_true_15290(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_Assert_1_AssertGuard_false_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_Assert_1_AssertGuard_true_16360(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_Assert_2_AssertGuard_false_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_Assert_2_AssertGuard_true_16850(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_Assert_3_AssertGuard_false_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_Assert_3_AssertGuard_true_17210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22960(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23450(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor } - // CHECK: func @_whitespace_func(%arg0: tensor<1x!tf.string>) -> (tensor, tensor) attributes {tf._GrapplerSpecializedFunc = true, tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { - // CHECK: "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) + // CHECK: func @whitespace_tokenizer_rank1(%arg0: tensor<1x!tf.string> {tf._user_specified_name = "input"}) -> (tensor, tensor) attributes {tf._input_shapes = [#tf.shape<1>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: %0:2 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor<1x!tf.string>) -> (tensor, tensor) // CHECK: return %0#0, %0#1 : tensor, tensor + + func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %1 = "tf.Const"() {value = dense : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %4 = "tf.Const"() {value = dense<[2, -1]> : tensor<2xi32>} : () -> tensor<2xi32> + %5 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %6 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %8 = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi32>} : () -> tensor<2xi32> + %9 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %10 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %12 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %13 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %14 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %15 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %16 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %17 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %18 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %19 = "tf.Shape"(%arg0) {device = ""} : (tensor) -> tensor<2xi64> + %20 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %21 = "tf.StridedSlice"(%19, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %22 = "tf.Mul"(%20, %21) {device = ""} : (tensor, tensor) -> tensor + %23 = "tf.Pack"(%22) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %24 = "tf.StridedSlice"(%19, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %25 = "tf.ConcatV2"(%23, %24, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %26 = "tf.Reshape"(%arg0, %25) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %27 = "tf.StringLength"(%26) {device = "", unit = "BYTE"} : (tensor) -> tensor + %28 = "tf.ExpandDims"(%27, %9) {device = ""} : (tensor, tensor) -> tensor + %29 = "tf.Cast"(%28) {Truncate = false, device = ""} : (tensor) -> tensor + %30 = "tf.Shape"(%29) {device = ""} : (tensor) -> tensor<2xi64> + %31 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %32 = "tf.StridedSlice"(%30, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %33 = "tf.Mul"(%31, %32) {device = ""} : (tensor, tensor) -> tensor + %34 = "tf.Pack"(%33) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %35 = "tf.StridedSlice"(%30, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %36 = "tf.ConcatV2"(%34, %35, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %37 = "tf.Reshape"(%29, %36) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %38 = "tf.StridedSlice"(%30, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %39 = "tf.AddV2"(%38, %15) {device = ""} : (tensor, tensor) -> tensor + %40 = "tf.Range"(%12, %39, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %41 = "tf.Mul"(%40, %15) {device = ""} : (tensor, tensor) -> tensor + %42 = "tf.Reshape"(%26, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %43:3 = "tf.UnicodeDecodeWithOffsets"(%42) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor) -> (tensor, tensor, tensor) + %44 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %45 = "tf.Shape"(%44) {device = ""} : (tensor) -> tensor<1xi32> + %46 = "tf.ConcatV2"(%45, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> + %47 = "tf.Reshape"(%44, %46) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %48 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi64> + %49 = "tf.StridedSlice"(%48, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %50 = "tf.AddV2"(%49, %15) {device = ""} : (tensor, tensor) -> tensor + %51 = "tf.Range"(%12, %50, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %52 = "tf.Mul"(%51, %15) {device = ""} : (tensor, tensor) -> tensor + %53 = "tf.ExpandDims"(%52, %9) {device = ""} : (tensor, tensor) -> tensor + %54 = "tf.Shape"(%52) {device = ""} : (tensor) -> tensor<1xi32> + %55 = "tf.StridedSlice"(%54, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %56 = "tf.StridedSlice"(%54, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %57 = "tf.StridedSlice"(%54, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %58 = "tf.StridedSlice"(%52, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %59 = "tf.StridedSlice"(%52, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %60 = "tf.Sub"(%58, %59) {device = ""} : (tensor, tensor) -> tensor + %61 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> + %62 = "tf.Cast"(%61) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %63 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %64 = "tf.Equal"(%63, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %65 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %66 = "tf.Equal"(%65, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %67 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.Shape"(%47) {device = ""} : (tensor) -> tensor<2xi32> + %69 = "tf.Cast"(%68) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %70 = "tf.StridedSlice"(%69, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %71 = "tf.Equal"(%70, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %72 = "tf.StridedSlice"(%43#0, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %73 = "tf.AddV2"(%72, %15) {device = ""} : (tensor, tensor) -> tensor + %74 = "tf.StridedSlice"(%43#0, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %75 = "tf.Minimum"(%73, %74) {device = ""} : (tensor, tensor) -> tensor + %76:2 = "tf.RaggedRange"(%75, %74, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %77 = "tf.Shape"(%76#0) {device = ""} : (tensor) -> tensor<1xi64> + %78 = "tf.StridedSlice"(%77, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %79 = "tf.Sub"(%78, %15) {device = ""} : (tensor, tensor) -> tensor + %80 = "tf.Equal"(%38, %79) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %81 = "tf.All"(%80, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %82 = "tf.If"(%81, %81, %38, %79) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630} : (tensor, tensor, tensor, tensor) -> tensor + %83 = "tf.Identity"(%82) {device = ""} : (tensor) -> tensor + %84 = "tf.StridedSlice"(%41, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %85 = "tf.Mul"(%79, %5) {device = ""} : (tensor, tensor) -> tensor + %86 = "tf.Range"(%12, %85, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %87 = "tf.Reshape"(%86, %4) {device = ""} : (tensor, tensor<2xi32>) -> tensor<2x?xi64> + %88 = "tf.Transpose"(%87, %8) {device = ""} : (tensor<2x?xi64>, tensor<2xi32>) -> tensor + %89 = "tf.Reshape"(%88, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %90 = "tf.StridedSlice"(%76#0, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %91 = "tf.AddV2"(%84, %90) {device = ""} : (tensor, tensor) -> tensor + %92 = "tf.ConcatV2"(%76#0, %91, %16) {device = ""} : (tensor, tensor, tensor) -> tensor + %93 = "tf.GatherV2"(%43#2, %76#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %94 = "tf.ConcatV2"(%93, %37, %16) {device = ""} : (tensor, tensor, tensor) -> tensor + %95:2 = "tf.RaggedGather"(%92, %94, %89) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %96 = "tf.StridedSlice"(%95#0, %17, %17, %7) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %97 = "tf.StridedSlice"(%96, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %98 = "tf.Shape"(%97) {device = ""} : (tensor) -> tensor<1xi32> + %99 = "tf.ConcatV2"(%98, %18, %16) {device = ""} : (tensor<1xi32>, tensor<1xi32>, tensor) -> tensor<2xi32> + %100 = "tf.Reshape"(%97, %99) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %101 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi64> + %102 = "tf.StridedSlice"(%101, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %103 = "tf.AddV2"(%102, %15) {device = ""} : (tensor, tensor) -> tensor + %104 = "tf.Range"(%12, %103, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %105 = "tf.Mul"(%104, %15) {device = ""} : (tensor, tensor) -> tensor + %106 = "tf.ExpandDims"(%105, %9) {device = ""} : (tensor, tensor) -> tensor + %107 = "tf.Shape"(%105) {device = ""} : (tensor) -> tensor<1xi32> + %108 = "tf.StridedSlice"(%107, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %109 = "tf.StridedSlice"(%107, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.StridedSlice"(%107, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %111 = "tf.StridedSlice"(%105, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %112 = "tf.StridedSlice"(%105, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %113 = "tf.Sub"(%111, %112) {device = ""} : (tensor, tensor) -> tensor + %114 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> + %115 = "tf.Cast"(%114) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %116 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %117 = "tf.Equal"(%116, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %118 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %119 = "tf.Equal"(%118, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %120 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %121 = "tf.Shape"(%100) {device = ""} : (tensor) -> tensor<2xi32> + %122 = "tf.Cast"(%121) {Truncate = false, device = ""} : (tensor<2xi32>) -> tensor<2xi64> + %123 = "tf.StridedSlice"(%122, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %124 = "tf.Equal"(%123, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %125:5 = "tf.WhitespaceTokenizeWithOffsets"(%43#1, %43#0) {Tsplits = i64, device = ""} : (tensor, tensor) -> (tensor, tensor, tensor, tensor, tensor) + %126 = "tf.StridedSlice"(%125#1, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.Equal"(%126, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %128 = "tf.All"(%127, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %129 = "tf.If"(%128, %128, %126, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390} : (tensor, tensor, tensor, tensor) -> tensor + %130 = "tf.Identity"(%129) {device = ""} : (tensor) -> tensor + %131 = "tf.StridedSlice"(%125#1, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %132 = "tf.StridedSlice"(%125#1, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %133 = "tf.Sub"(%131, %132) {device = ""} : (tensor, tensor) -> tensor + %134 = "tf.LessEqual"(%12, %133) {device = ""} : (tensor, tensor) -> tensor + %135 = "tf.All"(%134, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %136 = "tf.If"(%135, %135, %133) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750} : (tensor, tensor, tensor) -> tensor + %137 = "tf.Identity"(%136) {device = ""} : (tensor) -> tensor + %138 = "tf.Identity"(%125#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %139 = "tf.StridedSlice"(%138, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %140 = "tf.Shape"(%125#0) {device = ""} : (tensor) -> tensor<1xi64> + %141 = "tf.StridedSlice"(%140, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %142 = "tf.Equal"(%139, %141) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %143 = "tf.All"(%142, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %144 = "tf.If"(%143, %143, %139, %141) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090} : (tensor, tensor, tensor, tensor) -> tensor + %145 = "tf.Identity"(%144) {device = ""} : (tensor) -> tensor + %146 = "tf.Identity"(%138) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %147 = "tf.Shape"(%146) {device = ""} : (tensor) -> tensor<1xi64> + %148 = "tf.StridedSlice"(%147, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %149 = "tf.Sub"(%148, %15) {device = ""} : (tensor, tensor) -> tensor + %150 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %151 = "tf.Equal"(%150, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %152 = "tf.All"(%151, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %153 = "tf.If"(%152, %152, %150, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460} : (tensor, tensor, tensor, tensor) -> tensor + %154 = "tf.Identity"(%153) {device = ""} : (tensor) -> tensor + %155 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %156 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %157 = "tf.Sub"(%155, %156) {device = ""} : (tensor, tensor) -> tensor + %158 = "tf.LessEqual"(%12, %157) {device = ""} : (tensor, tensor) -> tensor + %159 = "tf.All"(%158, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %160 = "tf.If"(%159, %159, %157) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820} : (tensor, tensor, tensor) -> tensor + %161 = "tf.Identity"(%160) {device = ""} : (tensor) -> tensor + %162 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %163 = "tf.StridedSlice"(%162, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.Equal"(%163, %149) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %165 = "tf.All"(%164, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %166 = "tf.If"(%165, %165, %163, %149) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180} : (tensor, tensor, tensor, tensor) -> tensor + %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor + %168 = "tf.Identity"(%162) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %169 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %170 = "tf.Equal"(%169, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %171 = "tf.All"(%170, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %172 = "tf.If"(%171, %171, %169, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530} : (tensor, tensor, tensor, tensor) -> tensor + %173 = "tf.Identity"(%172) {device = ""} : (tensor) -> tensor + %174 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %175 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %176 = "tf.Sub"(%174, %175) {device = ""} : (tensor, tensor) -> tensor + %177 = "tf.LessEqual"(%12, %176) {device = ""} : (tensor, tensor) -> tensor + %178 = "tf.All"(%177, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %179 = "tf.If"(%178, %178, %176) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890} : (tensor, tensor, tensor) -> tensor + %180 = "tf.Identity"(%179) {device = ""} : (tensor) -> tensor + %181 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %182 = "tf.StridedSlice"(%181, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %183 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi64> + %184 = "tf.StridedSlice"(%183, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %185 = "tf.Equal"(%182, %184) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %186 = "tf.All"(%185, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %187 = "tf.If"(%186, %186, %182, %184) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230} : (tensor, tensor, tensor, tensor) -> tensor + %188 = "tf.Identity"(%187) {device = ""} : (tensor) -> tensor + %189 = "tf.Identity"(%181) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %190 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> + %191 = "tf.StridedSlice"(%190, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %192 = "tf.Sub"(%191, %15) {device = ""} : (tensor, tensor) -> tensor + %193 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %194 = "tf.LogicalOr"(%64, %193) {device = ""} : (tensor, tensor) -> tensor + %195 = "tf.Equal"(%192, %63) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %196 = "tf.LogicalOr"(%194, %195) {device = ""} : (tensor, tensor) -> tensor + %197 = "tf.StridedSlice"(%189, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %198 = "tf.StridedSlice"(%189, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %199 = "tf.Sub"(%197, %198) {device = ""} : (tensor, tensor) -> tensor + %200 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi64> + %201 = "tf.StridedSlice"(%200, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %202 = "tf.Sub"(%201, %15) {device = ""} : (tensor, tensor) -> tensor + %203 = "tf.Equal"(%202, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %204 = "tf.ExpandDims"(%189, %9) {device = ""} : (tensor, tensor) -> tensor + %205 = "tf.Shape"(%189) {device = ""} : (tensor) -> tensor<1xi32> + %206 = "tf.StridedSlice"(%205, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %207 = "tf.StridedSlice"(%205, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %208 = "tf.StridedSlice"(%205, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %209 = "tf.StridedSlice"(%125#4, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %210 = "tf.Equal"(%209, %12) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %211 = "tf.All"(%210, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %212 = "tf.If"(%211, %211, %209, %12) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600} : (tensor, tensor, tensor, tensor) -> tensor + %213 = "tf.Identity"(%212) {device = ""} : (tensor) -> tensor + %214 = "tf.StridedSlice"(%125#4, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %215 = "tf.StridedSlice"(%125#4, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %216 = "tf.Sub"(%214, %215) {device = ""} : (tensor, tensor) -> tensor + %217 = "tf.LessEqual"(%12, %216) {device = ""} : (tensor, tensor) -> tensor + %218 = "tf.All"(%217, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %219 = "tf.If"(%218, %218, %216) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960} : (tensor, tensor, tensor) -> tensor + %220 = "tf.Identity"(%219) {device = ""} : (tensor) -> tensor + %221 = "tf.Identity"(%125#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %222 = "tf.StridedSlice"(%221, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %223 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi64> + %224 = "tf.StridedSlice"(%223, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %225 = "tf.Equal"(%222, %224) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %226 = "tf.All"(%225, %11) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %227 = "tf.If"(%226, %226, %222, %224) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300} : (tensor, tensor, tensor, tensor) -> tensor + %228 = "tf.Identity"(%227) {device = ""} : (tensor) -> tensor + %229 = "tf.Identity"(%221) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %230 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> + %231 = "tf.StridedSlice"(%230, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %232 = "tf.Sub"(%231, %15) {device = ""} : (tensor, tensor) -> tensor + %233 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %234 = "tf.LogicalOr"(%233, %1) {device = ""} : (tensor, tensor) -> tensor + %235 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %236 = "tf.LogicalOr"(%234, %235) {device = ""} : (tensor, tensor) -> tensor + %237 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %238 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %239 = "tf.Sub"(%237, %238) {device = ""} : (tensor, tensor) -> tensor + %240 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi64> + %241 = "tf.StridedSlice"(%240, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %242 = "tf.Sub"(%241, %15) {device = ""} : (tensor, tensor) -> tensor + %243 = "tf.Equal"(%242, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %244 = "tf.ExpandDims"(%229, %9) {device = ""} : (tensor, tensor) -> tensor + %245 = "tf.Shape"(%229) {device = ""} : (tensor) -> tensor<1xi32> + %246 = "tf.StridedSlice"(%245, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %247 = "tf.StridedSlice"(%245, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %248 = "tf.StridedSlice"(%245, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %249 = "tf.StridedSlice"(%229, %6, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %250 = "tf.Range"(%12, %249, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %251 = "tf.StridedSlice"(%229, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %252 = "tf.StridedSlice"(%229, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %253 = "tf.Sub"(%251, %252) {device = ""} : (tensor, tensor) -> tensor + %254 = "tf.If"(%196, %196, %63, %192) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100} : (tensor, tensor, tensor, tensor) -> tensor + %255 = "tf.Identity"(%254) {device = ""} : (tensor) -> tensor + %256 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %257 = "tf.Select"(%256, %63, %192) {device = ""} : (tensor, tensor, tensor) -> tensor + %258 = "tf.Equal"(%257, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %259 = "tf.LogicalOr"(%258, %66) {device = ""} : (tensor, tensor) -> tensor + %260 = "tf.Equal"(%65, %257) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %261 = "tf.LogicalOr"(%259, %260) {device = ""} : (tensor, tensor) -> tensor + %262 = "tf.Select"(%203, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %263 = "tf.Pack"(%262, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %264 = "tf.StridedSlice"(%263, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %265 = "tf.Cast"(%264) {Truncate = false, device = ""} : (tensor) -> tensor + %266 = "tf.Reshape"(%265, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %267 = "tf.Pack"(%9, %266) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %268 = "tf.Tile"(%204, %267) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %269 = "tf.Mul"(%266, %207) {device = ""} : (tensor, tensor) -> tensor + %270 = "tf.Pack"(%269) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %271 = "tf.ConcatV2"(%206, %270, %208, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %272 = "tf.Reshape"(%268, %271) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %273 = "tf.Shape"(%272) {device = ""} : (tensor) -> tensor<1xi64> + %274 = "tf.StridedSlice"(%273, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %275 = "tf.Pack"(%264) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %276 = "tf.StridedSlice"(%272, %275, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %277 = "tf.Sub"(%274, %264) {device = ""} : (tensor, tensor) -> tensor + %278 = "tf.Pack"(%277) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %279 = "tf.StridedSlice"(%272, %13, %278, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %280:2 = "tf.RaggedRange"(%279, %276, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %281 = "tf.Select"(%71, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %282 = "tf.Pack"(%281, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %283 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %284 = "tf.Cast"(%283) {Truncate = false, device = ""} : (tensor) -> tensor + %285 = "tf.Reshape"(%284, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %286 = "tf.Pack"(%9, %285) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %287 = "tf.Tile"(%53, %286) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %288 = "tf.Mul"(%285, %56) {device = ""} : (tensor, tensor) -> tensor + %289 = "tf.Pack"(%288) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %290 = "tf.ConcatV2"(%55, %289, %57, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %291 = "tf.Reshape"(%287, %290) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %292 = "tf.Shape"(%291) {device = ""} : (tensor) -> tensor<1xi64> + %293 = "tf.StridedSlice"(%292, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %294 = "tf.Pack"(%283) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %295 = "tf.StridedSlice"(%291, %294, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %296 = "tf.Sub"(%293, %283) {device = ""} : (tensor, tensor) -> tensor + %297 = "tf.Pack"(%296) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %298 = "tf.StridedSlice"(%291, %13, %297, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %299:2 = "tf.RaggedRange"(%298, %295, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %300 = "tf.StridedSlice"(%282, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %301 = "tf.StridedSlice"(%282, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %302 = "tf.Mul"(%60, %301) {device = ""} : (tensor, tensor) -> tensor + %303 = "tf.Tile"(%302, %300) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %304 = "tf.Cumsum"(%303, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %305 = "tf.ConcatV2"(%13, %304, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %306 = "tf.StridedSlice"(%305, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %307 = "tf.ExpandDims"(%306, %9) {device = ""} : (tensor, tensor) -> tensor + %308 = "tf.Shape"(%306) {device = ""} : (tensor) -> tensor<1xi32> + %309 = "tf.StridedSlice"(%308, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %310 = "tf.Pack"(%309) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %311 = "tf.StridedSlice"(%305, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %312 = "tf.ExpandDims"(%311, %9) {device = ""} : (tensor, tensor) -> tensor + %313 = "tf.Shape"(%311) {device = ""} : (tensor) -> tensor<1xi32> + %314 = "tf.StridedSlice"(%313, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %315 = "tf.Pack"(%314) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %316 = "tf.Equal"(%192, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %317 = "tf.Select"(%316, %257, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %318 = "tf.Cast"(%317) {Truncate = false, device = ""} : (tensor) -> tensor + %319 = "tf.Reshape"(%318, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %320 = "tf.Pack"(%9, %319) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %321 = "tf.Mul"(%319, %10) {device = ""} : (tensor, tensor) -> tensor + %322 = "tf.Pack"(%321) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %323 = "tf.ConcatV2"(%11, %322, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %324 = "tf.Pack"(%317) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %325 = "tf.Pack"(%12, %192) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %326 = "tf.ExpandDims"(%325, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %327 = "tf.Tile"(%326, %320) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %328 = "tf.Reshape"(%327, %323) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %329 = "tf.Shape"(%328) {device = ""} : (tensor) -> tensor<1xi64> + %330 = "tf.StridedSlice"(%329, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %331 = "tf.Sub"(%330, %317) {device = ""} : (tensor, tensor) -> tensor + %332 = "tf.Pack"(%331) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %333 = "tf.StridedSlice"(%328, %13, %332, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %334 = "tf.StridedSlice"(%328, %324, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %335:2 = "tf.RaggedRange"(%333, %334, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %336 = "tf.GatherV2"(%199, %335#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %337 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor + %338 = "tf.BroadcastTo"(%337, %310) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %339 = "tf.Max"(%338, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %340 = "tf.Maximum"(%16, %339) {device = ""} : (tensor, tensor) -> tensor + %341 = "tf.Range"(%16, %340, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %342 = "tf.Pack"(%9, %340) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %343 = "tf.Tile"(%307, %342) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %344 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %345 = "tf.StridedSlice"(%344, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %346 = "tf.Prod"(%345, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %347 = "tf.Pack"(%346) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %348 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %349 = "tf.StridedSlice"(%348, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %350 = "tf.Shape"(%343) {device = ""} : (tensor) -> tensor<2xi32> + %351 = "tf.StridedSlice"(%350, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %352 = "tf.ConcatV2"(%349, %347, %351, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %353 = "tf.Reshape"(%343, %352) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %354 = "tf.ExpandDims"(%338, %2) {device = ""} : (tensor, tensor) -> tensor + %355 = "tf.Less"(%341, %354) {device = ""} : (tensor, tensor) -> tensor + %356 = "tf.Reshape"(%355, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %357 = "tf.Where"(%356) {device = ""} : (tensor) -> tensor + %358 = "tf.Squeeze"(%357) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %359 = "tf.GatherV2"(%353, %358, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %360 = "tf.Cast"(%336) {Truncate = false, device = ""} : (tensor) -> tensor + %361 = "tf.BroadcastTo"(%360, %315) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %362 = "tf.Max"(%361, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %363 = "tf.Maximum"(%16, %362) {device = ""} : (tensor, tensor) -> tensor + %364 = "tf.Range"(%16, %363, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %365 = "tf.Pack"(%9, %363) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %366 = "tf.Tile"(%312, %365) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %367 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %368 = "tf.StridedSlice"(%367, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %369 = "tf.Prod"(%368, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %370 = "tf.Pack"(%369) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %371 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %372 = "tf.StridedSlice"(%371, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %373 = "tf.Shape"(%366) {device = ""} : (tensor) -> tensor<2xi32> + %374 = "tf.StridedSlice"(%373, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %375 = "tf.ConcatV2"(%372, %370, %374, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %376 = "tf.Reshape"(%366, %375) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %377 = "tf.ExpandDims"(%361, %2) {device = ""} : (tensor, tensor) -> tensor + %378 = "tf.Less"(%364, %377) {device = ""} : (tensor, tensor) -> tensor + %379 = "tf.Reshape"(%378, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %380 = "tf.Where"(%379) {device = ""} : (tensor) -> tensor + %381 = "tf.Squeeze"(%380) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %382 = "tf.GatherV2"(%376, %381, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %383:2 = "tf.RaggedRange"(%359, %382, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %384 = "tf.If"(%261, %261, %257, %67) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170} : (tensor, tensor, tensor, tensor) -> tensor + %385 = "tf.Identity"(%384) {device = ""} : (tensor) -> tensor + %386 = "tf.StridedSlice"(%62, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %387 = "tf.Equal"(%386, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %388 = "tf.Select"(%387, %257, %386) {device = ""} : (tensor, tensor, tensor) -> tensor + %389 = "tf.Pack"(%388) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %390 = "tf.StridedSlice"(%62, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %391 = "tf.StridedSlice"(%62, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %392 = "tf.ConcatV2"(%390, %389, %391, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %393 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %394 = "tf.Equal"(%393, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %395 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %396 = "tf.StridedSlice"(%392, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %397 = "tf.Equal"(%396, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %398 = "tf.If"(%397, %397, %396, %336) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660} : (tensor, tensor, tensor, tensor) -> tensor + %399 = "tf.Identity"(%398) {device = ""} : (tensor) -> tensor + %400 = "tf.If"(%394, %394, %336, %395) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020} : (tensor, tensor, tensor, tensor) -> tensor + %401 = "tf.If"(%236, %236, %15, %232) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860} : (tensor, tensor, tensor, tensor) -> tensor + %402 = "tf.Identity"(%401) {device = ""} : (tensor) -> tensor + %403 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %404 = "tf.Select"(%403, %15, %232) {device = ""} : (tensor, tensor, tensor) -> tensor + %405 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %406 = "tf.LogicalOr"(%405, %1) {device = ""} : (tensor, tensor) -> tensor + %407 = "tf.Equal"(%404, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %408 = "tf.LogicalOr"(%406, %407) {device = ""} : (tensor, tensor) -> tensor + %409 = "tf.Select"(%243, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %410 = "tf.Pack"(%409, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %411 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %412 = "tf.Cast"(%411) {Truncate = false, device = ""} : (tensor) -> tensor + %413 = "tf.Reshape"(%412, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %414 = "tf.Pack"(%9, %413) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %415 = "tf.Tile"(%244, %414) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %416 = "tf.Mul"(%413, %247) {device = ""} : (tensor, tensor) -> tensor + %417 = "tf.Pack"(%416) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %418 = "tf.ConcatV2"(%246, %417, %248, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %419 = "tf.Reshape"(%415, %418) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %420 = "tf.Shape"(%419) {device = ""} : (tensor) -> tensor<1xi64> + %421 = "tf.StridedSlice"(%420, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %422 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %423 = "tf.StridedSlice"(%419, %422, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424 = "tf.Sub"(%421, %411) {device = ""} : (tensor, tensor) -> tensor + %425 = "tf.Pack"(%424) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %426 = "tf.StridedSlice"(%419, %13, %425, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %427:2 = "tf.RaggedRange"(%426, %423, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %428 = "tf.GatherV2"(%250, %427#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %429 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %430 = "tf.StridedSlice"(%410, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %431 = "tf.StridedSlice"(%410, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %432 = "tf.ConcatV2"(%430, %431, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %433 = "tf.StridedSlice"(%410, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %434 = "tf.Mul"(%253, %433) {device = ""} : (tensor, tensor) -> tensor + %435 = "tf.Tile"(%434, %429) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %436 = "tf.Cumsum"(%435, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %437 = "tf.ConcatV2"(%13, %436, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %438 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> + %439 = "tf.StridedSlice"(%438, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %440 = "tf.Sub"(%439, %15) {device = ""} : (tensor, tensor) -> tensor + %441 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %442 = "tf.LogicalOr"(%117, %441) {device = ""} : (tensor, tensor) -> tensor + %443 = "tf.Equal"(%440, %116) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %444 = "tf.LogicalOr"(%442, %443) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.StridedSlice"(%437, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %446 = "tf.StridedSlice"(%437, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %447 = "tf.Sub"(%445, %446) {device = ""} : (tensor, tensor) -> tensor + %448 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi64> + %449 = "tf.StridedSlice"(%448, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %450 = "tf.Sub"(%449, %15) {device = ""} : (tensor, tensor) -> tensor + %451 = "tf.Equal"(%450, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %452 = "tf.ExpandDims"(%437, %9) {device = ""} : (tensor, tensor) -> tensor + %453 = "tf.Shape"(%437) {device = ""} : (tensor) -> tensor<1xi32> + %454 = "tf.StridedSlice"(%453, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %455 = "tf.StridedSlice"(%453, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %456 = "tf.StridedSlice"(%453, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %457 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %458 = "tf.Pack"(%457, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %459 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %460 = "tf.Cast"(%459) {Truncate = false, device = ""} : (tensor) -> tensor + %461 = "tf.Reshape"(%460, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %462 = "tf.Pack"(%9, %461) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %463 = "tf.Tile"(%3, %462) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %464 = "tf.Mul"(%461, %10) {device = ""} : (tensor, tensor) -> tensor + %465 = "tf.Pack"(%464) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %466 = "tf.ConcatV2"(%11, %465, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %467 = "tf.Reshape"(%463, %466) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %468 = "tf.Shape"(%467) {device = ""} : (tensor) -> tensor<1xi64> + %469 = "tf.StridedSlice"(%468, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %470 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %471 = "tf.StridedSlice"(%467, %470, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %472 = "tf.Sub"(%469, %459) {device = ""} : (tensor, tensor) -> tensor + %473 = "tf.Pack"(%472) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %474 = "tf.StridedSlice"(%467, %13, %473, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %475:2 = "tf.RaggedRange"(%474, %471, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %476 = "tf.GatherV2"(%13, %475#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %477 = "tf.GatherV2"(%14, %476, %16) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %478 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %479 = "tf.StridedSlice"(%458, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %480 = "tf.StridedSlice"(%458, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %481 = "tf.ConcatV2"(%479, %480, %16) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %482 = "tf.Tile"(%477, %481) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %483 = "tf.StridedSlice"(%458, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %484 = "tf.Mul"(%483, %14) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %485 = "tf.Tile"(%484, %478) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %486 = "tf.Cumsum"(%485, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %487 = "tf.ConcatV2"(%13, %486, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %488 = "tf.StridedSlice"(%487, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %489 = "tf.ExpandDims"(%488, %9) {device = ""} : (tensor, tensor) -> tensor + %490 = "tf.Shape"(%488) {device = ""} : (tensor) -> tensor<1xi32> + %491 = "tf.StridedSlice"(%490, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %492 = "tf.Pack"(%491) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %493 = "tf.StridedSlice"(%487, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %494 = "tf.ExpandDims"(%493, %9) {device = ""} : (tensor, tensor) -> tensor + %495 = "tf.Shape"(%493) {device = ""} : (tensor) -> tensor<1xi32> + %496 = "tf.StridedSlice"(%495, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %497 = "tf.Pack"(%496) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %498 = "tf.Equal"(%232, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %499 = "tf.Select"(%498, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor + %501 = "tf.Reshape"(%500, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %502 = "tf.Pack"(%9, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %503 = "tf.Mul"(%501, %10) {device = ""} : (tensor, tensor) -> tensor + %504 = "tf.Pack"(%503) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %505 = "tf.ConcatV2"(%11, %504, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %506 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %507 = "tf.Pack"(%12, %232) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %508 = "tf.ExpandDims"(%507, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %509 = "tf.Tile"(%508, %502) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %510 = "tf.Reshape"(%509, %505) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %511 = "tf.Shape"(%510) {device = ""} : (tensor) -> tensor<1xi64> + %512 = "tf.StridedSlice"(%511, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %513 = "tf.Sub"(%512, %499) {device = ""} : (tensor, tensor) -> tensor + %514 = "tf.Pack"(%513) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %515 = "tf.StridedSlice"(%510, %13, %514, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %516 = "tf.StridedSlice"(%510, %506, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %517:2 = "tf.RaggedRange"(%515, %516, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %518 = "tf.GatherV2"(%239, %517#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %520 = "tf.BroadcastTo"(%519, %492) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %521 = "tf.Max"(%520, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %522 = "tf.Maximum"(%16, %521) {device = ""} : (tensor, tensor) -> tensor + %523 = "tf.Range"(%16, %522, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %524 = "tf.Pack"(%9, %522) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %525 = "tf.Tile"(%489, %524) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %526 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %527 = "tf.StridedSlice"(%526, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %528 = "tf.Prod"(%527, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %529 = "tf.Pack"(%528) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %530 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %531 = "tf.StridedSlice"(%530, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %532 = "tf.Shape"(%525) {device = ""} : (tensor) -> tensor<2xi32> + %533 = "tf.StridedSlice"(%532, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %534 = "tf.ConcatV2"(%531, %529, %533, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %535 = "tf.Reshape"(%525, %534) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %536 = "tf.ExpandDims"(%520, %2) {device = ""} : (tensor, tensor) -> tensor + %537 = "tf.Less"(%523, %536) {device = ""} : (tensor, tensor) -> tensor + %538 = "tf.Reshape"(%537, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %539 = "tf.Where"(%538) {device = ""} : (tensor) -> tensor + %540 = "tf.Squeeze"(%539) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %541 = "tf.GatherV2"(%535, %540, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %542 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %543 = "tf.BroadcastTo"(%542, %497) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %544 = "tf.Max"(%543, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %545 = "tf.Maximum"(%16, %544) {device = ""} : (tensor, tensor) -> tensor + %546 = "tf.Range"(%16, %545, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %547 = "tf.Pack"(%9, %545) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %548 = "tf.Tile"(%494, %547) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %549 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %550 = "tf.StridedSlice"(%549, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %551 = "tf.Prod"(%550, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %552 = "tf.Pack"(%551) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %553 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %554 = "tf.StridedSlice"(%553, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %555 = "tf.Shape"(%548) {device = ""} : (tensor) -> tensor<2xi32> + %556 = "tf.StridedSlice"(%555, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %557 = "tf.ConcatV2"(%554, %552, %556, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %558 = "tf.Reshape"(%548, %557) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %559 = "tf.ExpandDims"(%543, %2) {device = ""} : (tensor, tensor) -> tensor + %560 = "tf.Less"(%546, %559) {device = ""} : (tensor, tensor) -> tensor + %561 = "tf.Reshape"(%560, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %562 = "tf.Where"(%561) {device = ""} : (tensor) -> tensor + %563 = "tf.Squeeze"(%562) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %564 = "tf.GatherV2"(%558, %563, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %565:2 = "tf.RaggedRange"(%541, %564, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %566 = "tf.GatherV2"(%482, %565#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %567 = "tf.If"(%408, %408, %404, %15) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930} : (tensor, tensor, tensor, tensor) -> tensor + %568 = "tf.Identity"(%567) {device = ""} : (tensor) -> tensor + %569 = "tf.Select"(%1, %404, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %570 = "tf.Pack"(%569) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %571 = "tf.ConcatV2"(%0, %570, %14, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %572 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %573 = "tf.Equal"(%572, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %574 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %575 = "tf.StridedSlice"(%571, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %576 = "tf.Equal"(%575, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %577 = "tf.If"(%576, %576, %575, %518) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420} : (tensor, tensor, tensor, tensor) -> tensor + %578 = "tf.Identity"(%577) {device = ""} : (tensor) -> tensor + %579 = "tf.If"(%573, %573, %518, %574) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780} : (tensor, tensor, tensor, tensor) -> tensor + %580 = "tf.Identity"(%579) {device = ""} : (tensor) -> tensor + %581 = "tf.If"(%444, %444, %116, %440) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460} : (tensor, tensor, tensor, tensor) -> tensor + %582 = "tf.Identity"(%581) {device = ""} : (tensor) -> tensor + %583 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %584 = "tf.Select"(%583, %116, %440) {device = ""} : (tensor, tensor, tensor) -> tensor + %585 = "tf.Equal"(%584, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %586 = "tf.LogicalOr"(%585, %119) {device = ""} : (tensor, tensor) -> tensor + %587 = "tf.Equal"(%118, %584) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %588 = "tf.LogicalOr"(%586, %587) {device = ""} : (tensor, tensor) -> tensor + %589 = "tf.Select"(%451, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %590 = "tf.Pack"(%589, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %591 = "tf.StridedSlice"(%590, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %592 = "tf.Cast"(%591) {Truncate = false, device = ""} : (tensor) -> tensor + %593 = "tf.Reshape"(%592, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %594 = "tf.Pack"(%9, %593) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %595 = "tf.Tile"(%452, %594) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %596 = "tf.Mul"(%593, %455) {device = ""} : (tensor, tensor) -> tensor + %597 = "tf.Pack"(%596) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %598 = "tf.ConcatV2"(%454, %597, %456, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %599 = "tf.Reshape"(%595, %598) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %600 = "tf.Shape"(%599) {device = ""} : (tensor) -> tensor<1xi64> + %601 = "tf.StridedSlice"(%600, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %602 = "tf.Pack"(%591) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %603 = "tf.StridedSlice"(%599, %602, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %604 = "tf.Sub"(%601, %591) {device = ""} : (tensor, tensor) -> tensor + %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %606 = "tf.StridedSlice"(%599, %13, %605, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %607:2 = "tf.RaggedRange"(%606, %603, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %608 = "tf.Select"(%124, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %609 = "tf.Pack"(%608, %15) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %610 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %611 = "tf.Cast"(%610) {Truncate = false, device = ""} : (tensor) -> tensor + %612 = "tf.Reshape"(%611, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %613 = "tf.Pack"(%9, %612) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %614 = "tf.Tile"(%106, %613) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %615 = "tf.Mul"(%612, %109) {device = ""} : (tensor, tensor) -> tensor + %616 = "tf.Pack"(%615) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %617 = "tf.ConcatV2"(%108, %616, %110, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %618 = "tf.Reshape"(%614, %617) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %619 = "tf.Shape"(%618) {device = ""} : (tensor) -> tensor<1xi64> + %620 = "tf.StridedSlice"(%619, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %621 = "tf.Pack"(%610) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %622 = "tf.StridedSlice"(%618, %621, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %623 = "tf.Sub"(%620, %610) {device = ""} : (tensor, tensor) -> tensor + %624 = "tf.Pack"(%623) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %625 = "tf.StridedSlice"(%618, %13, %624, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %626:2 = "tf.RaggedRange"(%625, %622, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %627 = "tf.StridedSlice"(%609, %17, %18, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %628 = "tf.StridedSlice"(%609, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %629 = "tf.Mul"(%113, %628) {device = ""} : (tensor, tensor) -> tensor + %630 = "tf.Tile"(%629, %627) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %631 = "tf.Cumsum"(%630, %16) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %632 = "tf.ConcatV2"(%13, %631, %2) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %633 = "tf.StridedSlice"(%632, %17, %6, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %634 = "tf.ExpandDims"(%633, %9) {device = ""} : (tensor, tensor) -> tensor + %635 = "tf.Shape"(%633) {device = ""} : (tensor) -> tensor<1xi32> + %636 = "tf.StridedSlice"(%635, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %637 = "tf.Pack"(%636) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %638 = "tf.StridedSlice"(%632, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %639 = "tf.ExpandDims"(%638, %9) {device = ""} : (tensor, tensor) -> tensor + %640 = "tf.Shape"(%638) {device = ""} : (tensor) -> tensor<1xi32> + %641 = "tf.StridedSlice"(%640, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %642 = "tf.Pack"(%641) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %643 = "tf.Equal"(%440, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %644 = "tf.Select"(%643, %584, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %645 = "tf.Cast"(%644) {Truncate = false, device = ""} : (tensor) -> tensor + %646 = "tf.Reshape"(%645, %11) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %647 = "tf.Pack"(%9, %646) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %648 = "tf.Mul"(%646, %10) {device = ""} : (tensor, tensor) -> tensor + %649 = "tf.Pack"(%648) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %650 = "tf.ConcatV2"(%11, %649, %11, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %651 = "tf.Pack"(%644) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %652 = "tf.Pack"(%12, %440) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %653 = "tf.ExpandDims"(%652, %9) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %654 = "tf.Tile"(%653, %647) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %655 = "tf.Reshape"(%654, %650) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %656 = "tf.Shape"(%655) {device = ""} : (tensor) -> tensor<1xi64> + %657 = "tf.StridedSlice"(%656, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %658 = "tf.Sub"(%657, %644) {device = ""} : (tensor, tensor) -> tensor + %659 = "tf.Pack"(%658) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %660 = "tf.StridedSlice"(%655, %13, %659, %14) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %661 = "tf.StridedSlice"(%655, %651, %13, %14) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %662:2 = "tf.RaggedRange"(%660, %661, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %663 = "tf.GatherV2"(%447, %662#1, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %664 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor + %665 = "tf.BroadcastTo"(%664, %637) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %666 = "tf.Max"(%665, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %667 = "tf.Maximum"(%16, %666) {device = ""} : (tensor, tensor) -> tensor + %668 = "tf.Range"(%16, %667, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %669 = "tf.Pack"(%9, %667) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %670 = "tf.Tile"(%634, %669) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %671 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %672 = "tf.StridedSlice"(%671, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %673 = "tf.Prod"(%672, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %674 = "tf.Pack"(%673) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %675 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %676 = "tf.StridedSlice"(%675, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %677 = "tf.Shape"(%670) {device = ""} : (tensor) -> tensor<2xi32> + %678 = "tf.StridedSlice"(%677, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %679 = "tf.ConcatV2"(%676, %674, %678, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %680 = "tf.Reshape"(%670, %679) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %681 = "tf.ExpandDims"(%665, %2) {device = ""} : (tensor, tensor) -> tensor + %682 = "tf.Less"(%668, %681) {device = ""} : (tensor, tensor) -> tensor + %683 = "tf.Reshape"(%682, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %684 = "tf.Where"(%683) {device = ""} : (tensor) -> tensor + %685 = "tf.Squeeze"(%684) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %686 = "tf.GatherV2"(%680, %685, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %687 = "tf.Cast"(%663) {Truncate = false, device = ""} : (tensor) -> tensor + %688 = "tf.BroadcastTo"(%687, %642) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %689 = "tf.Max"(%688, %17) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %690 = "tf.Maximum"(%16, %689) {device = ""} : (tensor, tensor) -> tensor + %691 = "tf.Range"(%16, %690, %9) {device = ""} : (tensor, tensor, tensor) -> tensor + %692 = "tf.Pack"(%9, %690) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %693 = "tf.Tile"(%639, %692) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %694 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %695 = "tf.StridedSlice"(%694, %17, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %696 = "tf.Prod"(%695, %17) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %697 = "tf.Pack"(%696) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %698 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %699 = "tf.StridedSlice"(%698, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %700 = "tf.Shape"(%693) {device = ""} : (tensor) -> tensor<2xi32> + %701 = "tf.StridedSlice"(%700, %7, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %702 = "tf.ConcatV2"(%699, %697, %701, %16) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %703 = "tf.Reshape"(%693, %702) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %704 = "tf.ExpandDims"(%688, %2) {device = ""} : (tensor, tensor) -> tensor + %705 = "tf.Less"(%691, %704) {device = ""} : (tensor, tensor) -> tensor + %706 = "tf.Reshape"(%705, %6) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %707 = "tf.Where"(%706) {device = ""} : (tensor) -> tensor + %708 = "tf.Squeeze"(%707) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %709 = "tf.GatherV2"(%703, %708, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %710:2 = "tf.RaggedRange"(%686, %709, %15) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %711 = "tf.If"(%588, %588, %584, %120) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530} : (tensor, tensor, tensor, tensor) -> tensor + %712 = "tf.Identity"(%711) {device = ""} : (tensor) -> tensor + %713 = "tf.StridedSlice"(%115, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %714 = "tf.Equal"(%713, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %715 = "tf.Select"(%714, %584, %713) {device = ""} : (tensor, tensor, tensor) -> tensor + %716 = "tf.Pack"(%715) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %717 = "tf.StridedSlice"(%115, %17, %17, %18) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %718 = "tf.StridedSlice"(%115, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %719 = "tf.ConcatV2"(%717, %716, %718, %16) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %720 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %721 = "tf.Equal"(%720, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %722 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %723 = "tf.StridedSlice"(%719, %18, %7, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %724 = "tf.Equal"(%723, %15) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %725 = "tf.If"(%724, %724, %723, %663) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020} : (tensor, tensor, tensor, tensor) -> tensor + %726 = "tf.Identity"(%725) {device = ""} : (tensor) -> tensor + %727 = "tf.If"(%721, %721, %663, %722) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380} : (tensor, tensor, tensor, tensor) -> tensor + %728 = "tf.Identity"(%168) {device = ""} : (tensor) -> tensor + %729 = "tf.Identity"(%727) {device = ""} : (tensor) -> tensor + %730 = "tf.Identity"(%400) {device = ""} : (tensor) -> tensor + %731 = "tf.Shape"(%125#2) {device = ""} : (tensor) -> tensor<1xi32> + %732 = "tf.StridedSlice"(%731, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %733 = "tf.Cast"(%732) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %734 = "tf.Identity"(%733) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %735 = "tf.Shape"(%125#3) {device = ""} : (tensor) -> tensor<1xi32> + %736 = "tf.StridedSlice"(%735, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %737 = "tf.Cast"(%736) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %738 = "tf.Identity"(%737) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %739 = "tf.GatherV2"(%125#3, %428, %16) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %740 = "tf.Tile"(%739, %432) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %741 = "tf.Sub"(%740, %566) {device = ""} : (tensor, tensor) -> tensor + %742 = "tf.Shape"(%741) {device = ""} : (tensor) -> tensor<1xi32> + %743 = "tf.StridedSlice"(%742, %18, %17, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %744 = "tf.Cast"(%743) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %745 = "tf.Identity"(%744) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %746 = "tf.UnicodeEncode"(%125#0, %146) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %747 = "tf.Identity"(%746) {device = ""} : (tensor) -> tensor + %748 = "tf.StridedSlice"(%19, %17, %18, %18) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %749 = "tf.AddV2"(%748, %15) {device = ""} : (tensor, tensor) -> tensor + %750 = "tf.Range"(%12, %749, %15) {device = ""} : (tensor, tensor, tensor) -> tensor + %751 = "tf.Mul"(%750, %15) {device = ""} : (tensor, tensor) -> tensor + %752 = "tf.Identity"(%751) {device = ""} : (tensor) -> tensor + return %747, %752, %728 : tensor, tensor, tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_99640(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/strided_slice_4:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_99630(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_100400(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_100390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_100760(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_100750(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101090(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_101470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_101460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_101830(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_101820(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_102190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_102180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_102540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_102530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_102900(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_102890(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_103610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_103600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_103970(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_103960(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_104310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_104300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_105110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_105100(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_106180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_106170(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_106670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_106660(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_107030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_107020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_111870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_111860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_112940(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_112930(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_113430(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_113420(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_113790(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_113780(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_118470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_118460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_119540(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_119530(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_120030(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_120020(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_120390(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_120380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + + + // CHECK: func @whitespace_tokenizer_rank2(%arg0: tensor {tf._user_specified_name = "input"}) -> (tensor, tensor, tensor) attributes {tf._input_shapes = [#tf.shape], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: %0:3 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> (tensor, tensor, tensor) + // CHECK: return %0#0, %0#1, %0#2 : tensor, tensor, tensor + + func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<[0, 1]> : tensor<2xi64>} : () -> tensor<2xi64> + %1 = "tf.Const"() {value = dense<[]> : tensor<0xi64>} : () -> tensor<0xi64> + %2 = "tf.Const"() {value = dense : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<-1> : tensor} : () -> tensor + %4 = "tf.Const"() {value = dense<[[0], [1]]> : tensor<2x1xi64>} : () -> tensor<2x1xi64> + %5 = "tf.Const"() {value = dense<-1> : tensor<1xi32>} : () -> tensor<1xi32> + %6 = "tf.Const"() {value = dense<2> : tensor<1xi32>} : () -> tensor<1xi32> + %7 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %8 = "tf.Const"() {value = dense<2> : tensor} : () -> tensor + %9 = "tf.Const"() {value = dense<[]> : tensor<0xi32>} : () -> tensor<0xi32> + %10 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %11 = "tf.Const"() {value = dense<0> : tensor<1xi64>} : () -> tensor<1xi64> + %12 = "tf.Const"() {value = dense<1> : tensor<1xi64>} : () -> tensor<1xi64> + %13 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %14 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %15 = "tf.Const"() {value = dense<0> : tensor<1xi32>} : () -> tensor<1xi32> + %16 = "tf.Const"() {value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32> + %17 = "tf.If"(%2, %2, %13, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210} : (tensor, tensor, tensor, tensor) -> tensor + %18 = "tf.Identity"(%17) {device = ""} : (tensor) -> tensor + %19 = "tf.Pack"(%arg0) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1x!tf.string> + %20 = "tf.StringLength"(%19) {device = "", unit = "BYTE"} : (tensor<1x!tf.string>) -> tensor<1xi32> + %21 = "tf.ExpandDims"(%20, %7) {device = ""} : (tensor<1xi32>, tensor) -> tensor<1x1xi32> + %22 = "tf.Cast"(%21) {Truncate = false, device = ""} : (tensor<1x1xi32>) -> tensor<1x1xi64> + %23 = "tf.Reshape"(%22, %12) {device = ""} : (tensor<1x1xi64>, tensor<1xi64>) -> tensor<1xi64> + %24 = "tf.Reshape"(%19, %5) {device = ""} : (tensor<1x!tf.string>, tensor<1xi32>) -> tensor<1x!tf.string> + %25:3 = "tf.UnicodeDecodeWithOffsets"(%24) {Tsplits = i64, device = "", errors = "replace", input_encoding = "UTF-8", replace_control_characters = false, replacement_char = 65533 : i64} : (tensor<1x!tf.string>) -> (tensor<2xi64>, tensor, tensor) + %26 = "tf.StridedSlice"(%25#0, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %27 = "tf.AddV2"(%26, %13) {device = ""} : (tensor<1xi64>, tensor) -> tensor<1xi64> + %28 = "tf.StridedSlice"(%25#0, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %29 = "tf.Minimum"(%27, %28) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor<1xi64> + %30:2 = "tf.RaggedRange"(%29, %28, %13) {T = i64, Tsplits = i64, device = ""} : (tensor<1xi64>, tensor<1xi64>, tensor) -> (tensor<2xi64>, tensor) + %31 = "tf.StridedSlice"(%30#0, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %32 = "tf.AddV2"(%31, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %33 = "tf.ConcatV2"(%30#0, %32, %14) {device = ""} : (tensor<2xi64>, tensor<1xi64>, tensor) -> tensor<3xi64> + %34 = "tf.GatherV2"(%25#2, %30#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %35 = "tf.ConcatV2"(%34, %23, %14) {device = ""} : (tensor, tensor<1xi64>, tensor) -> tensor + %36:2 = "tf.RaggedGather"(%33, %35, %0) {OUTPUT_RAGGED_RANK = 1 : i64, PARAMS_RAGGED_RANK = 1 : i64, Tindices = i64, Tsplits = i64, Tvalues = i64, device = ""} : (tensor<3xi64>, tensor, tensor<2xi64>) -> (tensor, tensor) + %37:5 = "tf.WhitespaceTokenizeWithOffsets"(%25#1, %25#0) {Tsplits = i64, device = ""} : (tensor, tensor<2xi64>) -> (tensor, tensor, tensor, tensor, tensor) + %38 = "tf.StridedSlice"(%37#1, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %39 = "tf.Equal"(%38, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %40 = "tf.All"(%39, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %41 = "tf.If"(%40, %40, %38, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970} : (tensor, tensor, tensor, tensor) -> tensor + %42 = "tf.Identity"(%41) {device = ""} : (tensor) -> tensor + %43 = "tf.StridedSlice"(%37#1, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %44 = "tf.StridedSlice"(%37#1, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %45 = "tf.Sub"(%43, %44) {device = ""} : (tensor, tensor) -> tensor + %46 = "tf.LessEqual"(%10, %45) {device = ""} : (tensor, tensor) -> tensor + %47 = "tf.All"(%46, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %48 = "tf.If"(%47, %47, %45) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330} : (tensor, tensor, tensor) -> tensor + %49 = "tf.Identity"(%48) {device = ""} : (tensor) -> tensor + %50 = "tf.Identity"(%37#1) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %51 = "tf.StridedSlice"(%50, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %52 = "tf.Shape"(%37#0) {device = ""} : (tensor) -> tensor<1xi64> + %53 = "tf.StridedSlice"(%52, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %54 = "tf.Equal"(%51, %53) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %55 = "tf.All"(%54, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %56 = "tf.If"(%55, %55, %51, %53) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670} : (tensor, tensor, tensor, tensor) -> tensor + %57 = "tf.Identity"(%56) {device = ""} : (tensor) -> tensor + %58 = "tf.Identity"(%50) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %59 = "tf.Shape"(%58) {device = ""} : (tensor) -> tensor<1xi64> + %60 = "tf.StridedSlice"(%59, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %61 = "tf.Sub"(%60, %13) {device = ""} : (tensor, tensor) -> tensor + %62 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %63 = "tf.Equal"(%62, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %64 = "tf.All"(%63, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %65 = "tf.If"(%64, %64, %62, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040} : (tensor, tensor, tensor, tensor) -> tensor + %66 = "tf.Identity"(%65) {device = ""} : (tensor) -> tensor + %67 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %68 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %69 = "tf.Sub"(%67, %68) {device = ""} : (tensor, tensor) -> tensor + %70 = "tf.LessEqual"(%10, %69) {device = ""} : (tensor, tensor) -> tensor + %71 = "tf.All"(%70, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %72 = "tf.If"(%71, %71, %69) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400} : (tensor, tensor, tensor) -> tensor + %73 = "tf.Identity"(%72) {device = ""} : (tensor) -> tensor + %74 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %75 = "tf.StridedSlice"(%74, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %76 = "tf.Equal"(%75, %61) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %77 = "tf.All"(%76, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %78 = "tf.If"(%77, %77, %75, %61) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760} : (tensor, tensor, tensor, tensor) -> tensor + %79 = "tf.Identity"(%78) {device = ""} : (tensor) -> tensor + %80 = "tf.Identity"(%74) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %81 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %82 = "tf.Equal"(%81, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %83 = "tf.All"(%82, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %84 = "tf.If"(%83, %83, %81, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110} : (tensor, tensor, tensor, tensor) -> tensor + %85 = "tf.Identity"(%84) {device = ""} : (tensor) -> tensor + %86 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %87 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %88 = "tf.Sub"(%86, %87) {device = ""} : (tensor, tensor) -> tensor + %89 = "tf.LessEqual"(%10, %88) {device = ""} : (tensor, tensor) -> tensor + %90 = "tf.All"(%89, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %91 = "tf.If"(%90, %90, %88) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470} : (tensor, tensor, tensor) -> tensor + %92 = "tf.Identity"(%91) {device = ""} : (tensor) -> tensor + %93 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %94 = "tf.StridedSlice"(%93, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %95 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi64> + %96 = "tf.StridedSlice"(%95, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %97 = "tf.Equal"(%94, %96) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %98 = "tf.All"(%97, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %99 = "tf.If"(%98, %98, %94, %96) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810} : (tensor, tensor, tensor, tensor) -> tensor + %100 = "tf.Identity"(%99) {device = ""} : (tensor) -> tensor + %101 = "tf.Identity"(%93) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %102 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> + %103 = "tf.StridedSlice"(%102, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %104 = "tf.Sub"(%103, %13) {device = ""} : (tensor, tensor) -> tensor + %105 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %106 = "tf.LogicalOr"(%105, %2) {device = ""} : (tensor, tensor) -> tensor + %107 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %108 = "tf.LogicalOr"(%106, %107) {device = ""} : (tensor, tensor) -> tensor + %109 = "tf.StridedSlice"(%101, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %110 = "tf.StridedSlice"(%101, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %111 = "tf.Sub"(%109, %110) {device = ""} : (tensor, tensor) -> tensor + %112 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi64> + %113 = "tf.StridedSlice"(%112, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %114 = "tf.Sub"(%113, %13) {device = ""} : (tensor, tensor) -> tensor + %115 = "tf.Equal"(%114, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %116 = "tf.ExpandDims"(%101, %7) {device = ""} : (tensor, tensor) -> tensor + %117 = "tf.Shape"(%101) {device = ""} : (tensor) -> tensor<1xi32> + %118 = "tf.StridedSlice"(%117, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %119 = "tf.StridedSlice"(%117, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %120 = "tf.StridedSlice"(%117, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %121 = "tf.StridedSlice"(%37#4, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %122 = "tf.Equal"(%121, %10) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %123 = "tf.All"(%122, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %124 = "tf.If"(%123, %123, %121, %10) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180} : (tensor, tensor, tensor, tensor) -> tensor + %125 = "tf.Identity"(%124) {device = ""} : (tensor) -> tensor + %126 = "tf.StridedSlice"(%37#4, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %127 = "tf.StridedSlice"(%37#4, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %128 = "tf.Sub"(%126, %127) {device = ""} : (tensor, tensor) -> tensor + %129 = "tf.LessEqual"(%10, %128) {device = ""} : (tensor, tensor) -> tensor + %130 = "tf.All"(%129, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %131 = "tf.If"(%130, %130, %128) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540} : (tensor, tensor, tensor) -> tensor + %132 = "tf.Identity"(%131) {device = ""} : (tensor) -> tensor + %133 = "tf.Identity"(%37#4) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %134 = "tf.StridedSlice"(%133, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %135 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi64> + %136 = "tf.StridedSlice"(%135, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %137 = "tf.Equal"(%134, %136) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %138 = "tf.All"(%137, %9) {device = "", keep_dims = false} : (tensor, tensor<0xi32>) -> tensor + %139 = "tf.If"(%138, %138, %134, %136) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880} : (tensor, tensor, tensor, tensor) -> tensor + %140 = "tf.Identity"(%139) {device = ""} : (tensor) -> tensor + %141 = "tf.Identity"(%133) {_class = ["loc:@WhitespaceTokenize/WhitespaceTokenize/WhitespaceTokenizeWithOffsets"], device = ""} : (tensor) -> tensor + %142 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> + %143 = "tf.StridedSlice"(%142, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %144 = "tf.Sub"(%143, %13) {device = ""} : (tensor, tensor) -> tensor + %145 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %146 = "tf.LogicalOr"(%145, %2) {device = ""} : (tensor, tensor) -> tensor + %147 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %148 = "tf.LogicalOr"(%146, %147) {device = ""} : (tensor, tensor) -> tensor + %149 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %150 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %151 = "tf.Sub"(%149, %150) {device = ""} : (tensor, tensor) -> tensor + %152 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi64> + %153 = "tf.StridedSlice"(%152, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %154 = "tf.Sub"(%153, %13) {device = ""} : (tensor, tensor) -> tensor + %155 = "tf.Equal"(%154, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %156 = "tf.ExpandDims"(%141, %7) {device = ""} : (tensor, tensor) -> tensor + %157 = "tf.Shape"(%141) {device = ""} : (tensor) -> tensor<1xi32> + %158 = "tf.StridedSlice"(%157, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %159 = "tf.StridedSlice"(%157, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %160 = "tf.StridedSlice"(%157, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %161 = "tf.StridedSlice"(%141, %5, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %162 = "tf.Range"(%10, %161, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %163 = "tf.StridedSlice"(%141, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %164 = "tf.StridedSlice"(%141, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %165 = "tf.Sub"(%163, %164) {device = ""} : (tensor, tensor) -> tensor + %166 = "tf.If"(%108, %108, %13, %104) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680} : (tensor, tensor, tensor, tensor) -> tensor + %167 = "tf.Identity"(%166) {device = ""} : (tensor) -> tensor + %168 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %169 = "tf.Select"(%168, %13, %104) {device = ""} : (tensor, tensor, tensor) -> tensor + %170 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %171 = "tf.LogicalOr"(%170, %2) {device = ""} : (tensor, tensor) -> tensor + %172 = "tf.Equal"(%169, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %173 = "tf.LogicalOr"(%171, %172) {device = ""} : (tensor, tensor) -> tensor + %174 = "tf.Select"(%115, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %175 = "tf.Pack"(%174, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %176 = "tf.StridedSlice"(%175, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %177 = "tf.Cast"(%176) {Truncate = false, device = ""} : (tensor) -> tensor + %178 = "tf.Reshape"(%177, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %179 = "tf.Pack"(%7, %178) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %180 = "tf.Tile"(%116, %179) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %181 = "tf.Mul"(%178, %119) {device = ""} : (tensor, tensor) -> tensor + %182 = "tf.Pack"(%181) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %183 = "tf.ConcatV2"(%118, %182, %120, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %184 = "tf.Reshape"(%180, %183) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %185 = "tf.Shape"(%184) {device = ""} : (tensor) -> tensor<1xi64> + %186 = "tf.StridedSlice"(%185, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %187 = "tf.Pack"(%176) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %188 = "tf.StridedSlice"(%184, %187, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %189 = "tf.Sub"(%186, %176) {device = ""} : (tensor, tensor) -> tensor + %190 = "tf.Pack"(%189) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %191 = "tf.StridedSlice"(%184, %11, %190, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %192:2 = "tf.RaggedRange"(%191, %188, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %193 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %194 = "tf.Pack"(%193, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %195 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %196 = "tf.Cast"(%195) {Truncate = false, device = ""} : (tensor) -> tensor + %197 = "tf.Reshape"(%196, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %198 = "tf.Pack"(%7, %197) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %199 = "tf.Tile"(%4, %198) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %200 = "tf.Mul"(%197, %8) {device = ""} : (tensor, tensor) -> tensor + %201 = "tf.Pack"(%200) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %202 = "tf.ConcatV2"(%9, %201, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %203 = "tf.Reshape"(%199, %202) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %204 = "tf.Shape"(%203) {device = ""} : (tensor) -> tensor<1xi64> + %205 = "tf.StridedSlice"(%204, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %206 = "tf.Pack"(%195) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %207 = "tf.StridedSlice"(%203, %206, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %208 = "tf.Sub"(%205, %195) {device = ""} : (tensor, tensor) -> tensor + %209 = "tf.Pack"(%208) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %210 = "tf.StridedSlice"(%203, %11, %209, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %211:2 = "tf.RaggedRange"(%210, %207, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %212 = "tf.StridedSlice"(%194, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %213 = "tf.StridedSlice"(%194, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %214 = "tf.Mul"(%213, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %215 = "tf.Tile"(%214, %212) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %216 = "tf.Cumsum"(%215, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %217 = "tf.ConcatV2"(%11, %216, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %218 = "tf.StridedSlice"(%217, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %219 = "tf.ExpandDims"(%218, %7) {device = ""} : (tensor, tensor) -> tensor + %220 = "tf.Shape"(%218) {device = ""} : (tensor) -> tensor<1xi32> + %221 = "tf.StridedSlice"(%220, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %222 = "tf.Pack"(%221) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %223 = "tf.StridedSlice"(%217, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %224 = "tf.ExpandDims"(%223, %7) {device = ""} : (tensor, tensor) -> tensor + %225 = "tf.Shape"(%223) {device = ""} : (tensor) -> tensor<1xi32> + %226 = "tf.StridedSlice"(%225, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %227 = "tf.Pack"(%226) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %228 = "tf.Equal"(%104, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %229 = "tf.Select"(%228, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %230 = "tf.Cast"(%229) {Truncate = false, device = ""} : (tensor) -> tensor + %231 = "tf.Reshape"(%230, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %232 = "tf.Pack"(%7, %231) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %233 = "tf.Mul"(%231, %8) {device = ""} : (tensor, tensor) -> tensor + %234 = "tf.Pack"(%233) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %235 = "tf.ConcatV2"(%9, %234, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %236 = "tf.Pack"(%229) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %237 = "tf.Pack"(%10, %104) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %238 = "tf.ExpandDims"(%237, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %239 = "tf.Tile"(%238, %232) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %240 = "tf.Reshape"(%239, %235) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %241 = "tf.Shape"(%240) {device = ""} : (tensor) -> tensor<1xi64> + %242 = "tf.StridedSlice"(%241, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %243 = "tf.Sub"(%242, %229) {device = ""} : (tensor, tensor) -> tensor + %244 = "tf.Pack"(%243) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %245 = "tf.StridedSlice"(%240, %11, %244, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %246 = "tf.StridedSlice"(%240, %236, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %247:2 = "tf.RaggedRange"(%245, %246, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %248 = "tf.GatherV2"(%111, %247#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %249 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor + %250 = "tf.BroadcastTo"(%249, %222) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %251 = "tf.Max"(%250, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %252 = "tf.Maximum"(%14, %251) {device = ""} : (tensor, tensor) -> tensor + %253 = "tf.Range"(%14, %252, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %254 = "tf.Pack"(%7, %252) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %255 = "tf.Tile"(%219, %254) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %256 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %257 = "tf.StridedSlice"(%256, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %258 = "tf.Prod"(%257, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %259 = "tf.Pack"(%258) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %260 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %261 = "tf.StridedSlice"(%260, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %262 = "tf.Shape"(%255) {device = ""} : (tensor) -> tensor<2xi32> + %263 = "tf.StridedSlice"(%262, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %264 = "tf.ConcatV2"(%261, %259, %263, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %265 = "tf.Reshape"(%255, %264) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %266 = "tf.ExpandDims"(%250, %3) {device = ""} : (tensor, tensor) -> tensor + %267 = "tf.Less"(%253, %266) {device = ""} : (tensor, tensor) -> tensor + %268 = "tf.Reshape"(%267, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %269 = "tf.Where"(%268) {device = ""} : (tensor) -> tensor + %270 = "tf.Squeeze"(%269) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %271 = "tf.GatherV2"(%265, %270, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %272 = "tf.Cast"(%248) {Truncate = false, device = ""} : (tensor) -> tensor + %273 = "tf.BroadcastTo"(%272, %227) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %274 = "tf.Max"(%273, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %275 = "tf.Maximum"(%14, %274) {device = ""} : (tensor, tensor) -> tensor + %276 = "tf.Range"(%14, %275, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %277 = "tf.Pack"(%7, %275) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %278 = "tf.Tile"(%224, %277) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %279 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %280 = "tf.StridedSlice"(%279, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %281 = "tf.Prod"(%280, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %282 = "tf.Pack"(%281) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %283 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %284 = "tf.StridedSlice"(%283, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %285 = "tf.Shape"(%278) {device = ""} : (tensor) -> tensor<2xi32> + %286 = "tf.StridedSlice"(%285, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %287 = "tf.ConcatV2"(%284, %282, %286, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %288 = "tf.Reshape"(%278, %287) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %289 = "tf.ExpandDims"(%273, %3) {device = ""} : (tensor, tensor) -> tensor + %290 = "tf.Less"(%276, %289) {device = ""} : (tensor, tensor) -> tensor + %291 = "tf.Reshape"(%290, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %292 = "tf.Where"(%291) {device = ""} : (tensor) -> tensor + %293 = "tf.Squeeze"(%292) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %294 = "tf.GatherV2"(%288, %293, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %295:2 = "tf.RaggedRange"(%271, %294, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %296 = "tf.If"(%173, %173, %169, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750} : (tensor, tensor, tensor, tensor) -> tensor + %297 = "tf.Identity"(%296) {device = ""} : (tensor) -> tensor + %298 = "tf.Select"(%2, %169, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %299 = "tf.Pack"(%298) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %300 = "tf.ConcatV2"(%1, %299, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %301 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %302 = "tf.Equal"(%301, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %303 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %304 = "tf.StridedSlice"(%300, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %305 = "tf.Equal"(%304, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %306 = "tf.If"(%305, %305, %304, %248) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240} : (tensor, tensor, tensor, tensor) -> tensor + %307 = "tf.Identity"(%306) {device = ""} : (tensor) -> tensor + %308 = "tf.If"(%302, %302, %248, %303) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600} : (tensor, tensor, tensor, tensor) -> tensor + %309 = "tf.If"(%148, %148, %13, %144) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300} : (tensor, tensor, tensor, tensor) -> tensor + %310 = "tf.Identity"(%309) {device = ""} : (tensor) -> tensor + %311 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %312 = "tf.Select"(%311, %13, %144) {device = ""} : (tensor, tensor, tensor) -> tensor + %313 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %314 = "tf.LogicalOr"(%313, %2) {device = ""} : (tensor, tensor) -> tensor + %315 = "tf.Equal"(%312, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %316 = "tf.LogicalOr"(%314, %315) {device = ""} : (tensor, tensor) -> tensor + %317 = "tf.Select"(%155, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %318 = "tf.Pack"(%317, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %319 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %320 = "tf.Cast"(%319) {Truncate = false, device = ""} : (tensor) -> tensor + %321 = "tf.Reshape"(%320, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %322 = "tf.Pack"(%7, %321) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %323 = "tf.Tile"(%156, %322) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %324 = "tf.Mul"(%321, %159) {device = ""} : (tensor, tensor) -> tensor + %325 = "tf.Pack"(%324) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %326 = "tf.ConcatV2"(%158, %325, %160, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %327 = "tf.Reshape"(%323, %326) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %328 = "tf.Shape"(%327) {device = ""} : (tensor) -> tensor<1xi64> + %329 = "tf.StridedSlice"(%328, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %330 = "tf.Pack"(%319) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %331 = "tf.StridedSlice"(%327, %330, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %332 = "tf.Sub"(%329, %319) {device = ""} : (tensor, tensor) -> tensor + %333 = "tf.Pack"(%332) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %334 = "tf.StridedSlice"(%327, %11, %333, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %335:2 = "tf.RaggedRange"(%334, %331, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %336 = "tf.GatherV2"(%162, %335#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %337 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %338 = "tf.StridedSlice"(%318, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %339 = "tf.StridedSlice"(%318, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %340 = "tf.ConcatV2"(%338, %339, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %341 = "tf.StridedSlice"(%318, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %342 = "tf.Mul"(%165, %341) {device = ""} : (tensor, tensor) -> tensor + %343 = "tf.Tile"(%342, %337) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %344 = "tf.Cumsum"(%343, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %345 = "tf.ConcatV2"(%11, %344, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %346 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> + %347 = "tf.StridedSlice"(%346, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %348 = "tf.Sub"(%347, %13) {device = ""} : (tensor, tensor) -> tensor + %349 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %350 = "tf.LogicalOr"(%349, %2) {device = ""} : (tensor, tensor) -> tensor + %351 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %352 = "tf.LogicalOr"(%350, %351) {device = ""} : (tensor, tensor) -> tensor + %353 = "tf.StridedSlice"(%345, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %354 = "tf.StridedSlice"(%345, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %355 = "tf.Sub"(%353, %354) {device = ""} : (tensor, tensor) -> tensor + %356 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi64> + %357 = "tf.StridedSlice"(%356, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %358 = "tf.Sub"(%357, %13) {device = ""} : (tensor, tensor) -> tensor + %359 = "tf.Equal"(%358, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %360 = "tf.ExpandDims"(%345, %7) {device = ""} : (tensor, tensor) -> tensor + %361 = "tf.Shape"(%345) {device = ""} : (tensor) -> tensor<1xi32> + %362 = "tf.StridedSlice"(%361, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %363 = "tf.StridedSlice"(%361, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %364 = "tf.StridedSlice"(%361, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %365 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %366 = "tf.Pack"(%365, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %367 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %368 = "tf.Cast"(%367) {Truncate = false, device = ""} : (tensor) -> tensor + %369 = "tf.Reshape"(%368, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %370 = "tf.Pack"(%7, %369) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %371 = "tf.Tile"(%4, %370) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %372 = "tf.Mul"(%369, %8) {device = ""} : (tensor, tensor) -> tensor + %373 = "tf.Pack"(%372) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %374 = "tf.ConcatV2"(%9, %373, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %375 = "tf.Reshape"(%371, %374) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %376 = "tf.Shape"(%375) {device = ""} : (tensor) -> tensor<1xi64> + %377 = "tf.StridedSlice"(%376, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %378 = "tf.Pack"(%367) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %379 = "tf.StridedSlice"(%375, %378, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %380 = "tf.Sub"(%377, %367) {device = ""} : (tensor, tensor) -> tensor + %381 = "tf.Pack"(%380) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %382 = "tf.StridedSlice"(%375, %11, %381, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %383:2 = "tf.RaggedRange"(%382, %379, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %384 = "tf.GatherV2"(%11, %383#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %385 = "tf.GatherV2"(%12, %384, %14) {batch_dims = 0 : i64, device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %386 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %387 = "tf.StridedSlice"(%366, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %388 = "tf.StridedSlice"(%366, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi64> + %389 = "tf.ConcatV2"(%387, %388, %14) {device = ""} : (tensor<1xi64>, tensor<0xi64>, tensor) -> tensor<1xi64> + %390 = "tf.Tile"(%385, %389) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %391 = "tf.StridedSlice"(%366, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %392 = "tf.Mul"(%391, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %393 = "tf.Tile"(%392, %386) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %394 = "tf.Cumsum"(%393, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %395 = "tf.ConcatV2"(%11, %394, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %396 = "tf.StridedSlice"(%395, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %397 = "tf.ExpandDims"(%396, %7) {device = ""} : (tensor, tensor) -> tensor + %398 = "tf.Shape"(%396) {device = ""} : (tensor) -> tensor<1xi32> + %399 = "tf.StridedSlice"(%398, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %400 = "tf.Pack"(%399) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %401 = "tf.StridedSlice"(%395, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %402 = "tf.ExpandDims"(%401, %7) {device = ""} : (tensor, tensor) -> tensor + %403 = "tf.Shape"(%401) {device = ""} : (tensor) -> tensor<1xi32> + %404 = "tf.StridedSlice"(%403, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %405 = "tf.Pack"(%404) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %406 = "tf.Equal"(%144, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %407 = "tf.Select"(%406, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %408 = "tf.Cast"(%407) {Truncate = false, device = ""} : (tensor) -> tensor + %409 = "tf.Reshape"(%408, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %410 = "tf.Pack"(%7, %409) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %411 = "tf.Mul"(%409, %8) {device = ""} : (tensor, tensor) -> tensor + %412 = "tf.Pack"(%411) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %413 = "tf.ConcatV2"(%9, %412, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %414 = "tf.Pack"(%407) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %415 = "tf.Pack"(%10, %144) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %416 = "tf.ExpandDims"(%415, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %417 = "tf.Tile"(%416, %410) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %418 = "tf.Reshape"(%417, %413) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %419 = "tf.Shape"(%418) {device = ""} : (tensor) -> tensor<1xi64> + %420 = "tf.StridedSlice"(%419, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %421 = "tf.Sub"(%420, %407) {device = ""} : (tensor, tensor) -> tensor + %422 = "tf.Pack"(%421) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %423 = "tf.StridedSlice"(%418, %11, %422, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %424 = "tf.StridedSlice"(%418, %414, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %425:2 = "tf.RaggedRange"(%423, %424, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %426 = "tf.GatherV2"(%151, %425#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %427 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor + %428 = "tf.BroadcastTo"(%427, %400) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %429 = "tf.Max"(%428, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %430 = "tf.Maximum"(%14, %429) {device = ""} : (tensor, tensor) -> tensor + %431 = "tf.Range"(%14, %430, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %432 = "tf.Pack"(%7, %430) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %433 = "tf.Tile"(%397, %432) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %434 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %435 = "tf.StridedSlice"(%434, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %436 = "tf.Prod"(%435, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %437 = "tf.Pack"(%436) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %438 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %439 = "tf.StridedSlice"(%438, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %440 = "tf.Shape"(%433) {device = ""} : (tensor) -> tensor<2xi32> + %441 = "tf.StridedSlice"(%440, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %442 = "tf.ConcatV2"(%439, %437, %441, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %443 = "tf.Reshape"(%433, %442) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %444 = "tf.ExpandDims"(%428, %3) {device = ""} : (tensor, tensor) -> tensor + %445 = "tf.Less"(%431, %444) {device = ""} : (tensor, tensor) -> tensor + %446 = "tf.Reshape"(%445, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %447 = "tf.Where"(%446) {device = ""} : (tensor) -> tensor + %448 = "tf.Squeeze"(%447) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %449 = "tf.GatherV2"(%443, %448, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %450 = "tf.Cast"(%426) {Truncate = false, device = ""} : (tensor) -> tensor + %451 = "tf.BroadcastTo"(%450, %405) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %452 = "tf.Max"(%451, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %453 = "tf.Maximum"(%14, %452) {device = ""} : (tensor, tensor) -> tensor + %454 = "tf.Range"(%14, %453, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %455 = "tf.Pack"(%7, %453) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %456 = "tf.Tile"(%402, %455) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %457 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %458 = "tf.StridedSlice"(%457, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %459 = "tf.Prod"(%458, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %460 = "tf.Pack"(%459) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %461 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %462 = "tf.StridedSlice"(%461, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %463 = "tf.Shape"(%456) {device = ""} : (tensor) -> tensor<2xi32> + %464 = "tf.StridedSlice"(%463, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %465 = "tf.ConcatV2"(%462, %460, %464, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %466 = "tf.Reshape"(%456, %465) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %467 = "tf.ExpandDims"(%451, %3) {device = ""} : (tensor, tensor) -> tensor + %468 = "tf.Less"(%454, %467) {device = ""} : (tensor, tensor) -> tensor + %469 = "tf.Reshape"(%468, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %470 = "tf.Where"(%469) {device = ""} : (tensor) -> tensor + %471 = "tf.Squeeze"(%470) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %472 = "tf.GatherV2"(%466, %471, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %473:2 = "tf.RaggedRange"(%449, %472, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %474 = "tf.GatherV2"(%390, %473#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %475 = "tf.If"(%316, %316, %312, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370} : (tensor, tensor, tensor, tensor) -> tensor + %476 = "tf.Identity"(%475) {device = ""} : (tensor) -> tensor + %477 = "tf.Select"(%2, %312, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %478 = "tf.Pack"(%477) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %479 = "tf.ConcatV2"(%1, %478, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %480 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %481 = "tf.Equal"(%480, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %482 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %483 = "tf.StridedSlice"(%479, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %484 = "tf.Equal"(%483, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %485 = "tf.If"(%484, %484, %483, %426) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860} : (tensor, tensor, tensor, tensor) -> tensor + %486 = "tf.Identity"(%485) {device = ""} : (tensor) -> tensor + %487 = "tf.If"(%481, %481, %426, %482) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220} : (tensor, tensor, tensor, tensor) -> tensor + %488 = "tf.Identity"(%487) {device = ""} : (tensor) -> tensor + %489 = "tf.If"(%352, %352, %13, %348) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900} : (tensor, tensor, tensor, tensor) -> tensor + %490 = "tf.Identity"(%489) {device = ""} : (tensor) -> tensor + %491 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %492 = "tf.Select"(%491, %13, %348) {device = ""} : (tensor, tensor, tensor) -> tensor + %493 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %494 = "tf.LogicalOr"(%493, %2) {device = ""} : (tensor, tensor) -> tensor + %495 = "tf.Equal"(%492, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %496 = "tf.LogicalOr"(%494, %495) {device = ""} : (tensor, tensor) -> tensor + %497 = "tf.Select"(%359, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %498 = "tf.Pack"(%497, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %499 = "tf.StridedSlice"(%498, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %500 = "tf.Cast"(%499) {Truncate = false, device = ""} : (tensor) -> tensor + %501 = "tf.Reshape"(%500, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %502 = "tf.Pack"(%7, %501) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %503 = "tf.Tile"(%360, %502) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %504 = "tf.Mul"(%501, %363) {device = ""} : (tensor, tensor) -> tensor + %505 = "tf.Pack"(%504) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %506 = "tf.ConcatV2"(%362, %505, %364, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %507 = "tf.Reshape"(%503, %506) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %508 = "tf.Shape"(%507) {device = ""} : (tensor) -> tensor<1xi64> + %509 = "tf.StridedSlice"(%508, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %510 = "tf.Pack"(%499) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %511 = "tf.StridedSlice"(%507, %510, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %512 = "tf.Sub"(%509, %499) {device = ""} : (tensor, tensor) -> tensor + %513 = "tf.Pack"(%512) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %514 = "tf.StridedSlice"(%507, %11, %513, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %515:2 = "tf.RaggedRange"(%514, %511, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %516 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %517 = "tf.Pack"(%516, %13) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %518 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %519 = "tf.Cast"(%518) {Truncate = false, device = ""} : (tensor) -> tensor + %520 = "tf.Reshape"(%519, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %521 = "tf.Pack"(%7, %520) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %522 = "tf.Tile"(%4, %521) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %523 = "tf.Mul"(%520, %8) {device = ""} : (tensor, tensor) -> tensor + %524 = "tf.Pack"(%523) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %525 = "tf.ConcatV2"(%9, %524, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %526 = "tf.Reshape"(%522, %525) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %527 = "tf.Shape"(%526) {device = ""} : (tensor) -> tensor<1xi64> + %528 = "tf.StridedSlice"(%527, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %529 = "tf.Pack"(%518) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %530 = "tf.StridedSlice"(%526, %529, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %531 = "tf.Sub"(%528, %518) {device = ""} : (tensor, tensor) -> tensor + %532 = "tf.Pack"(%531) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %533 = "tf.StridedSlice"(%526, %11, %532, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %534:2 = "tf.RaggedRange"(%533, %530, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %535 = "tf.StridedSlice"(%517, %15, %16, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<1xi64> + %536 = "tf.StridedSlice"(%517, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %537 = "tf.Mul"(%536, %12) {device = ""} : (tensor, tensor<1xi64>) -> tensor<1xi64> + %538 = "tf.Tile"(%537, %535) {device = ""} : (tensor<1xi64>, tensor<1xi64>) -> tensor + %539 = "tf.Cumsum"(%538, %14) {device = "", exclusive = false, reverse = false} : (tensor, tensor) -> tensor + %540 = "tf.ConcatV2"(%11, %539, %3) {device = ""} : (tensor<1xi64>, tensor, tensor) -> tensor + %541 = "tf.StridedSlice"(%540, %15, %5, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %542 = "tf.ExpandDims"(%541, %7) {device = ""} : (tensor, tensor) -> tensor + %543 = "tf.Shape"(%541) {device = ""} : (tensor) -> tensor<1xi32> + %544 = "tf.StridedSlice"(%543, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %545 = "tf.Pack"(%544) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %546 = "tf.StridedSlice"(%540, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %547 = "tf.ExpandDims"(%546, %7) {device = ""} : (tensor, tensor) -> tensor + %548 = "tf.Shape"(%546) {device = ""} : (tensor) -> tensor<1xi32> + %549 = "tf.StridedSlice"(%548, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %550 = "tf.Pack"(%549) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %551 = "tf.Equal"(%348, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %552 = "tf.Select"(%551, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %553 = "tf.Cast"(%552) {Truncate = false, device = ""} : (tensor) -> tensor + %554 = "tf.Reshape"(%553, %9) {device = ""} : (tensor, tensor<0xi32>) -> tensor + %555 = "tf.Pack"(%7, %554) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %556 = "tf.Mul"(%554, %8) {device = ""} : (tensor, tensor) -> tensor + %557 = "tf.Pack"(%556) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %558 = "tf.ConcatV2"(%9, %557, %9, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %559 = "tf.Pack"(%552) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %560 = "tf.Pack"(%10, %348) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi64> + %561 = "tf.ExpandDims"(%560, %7) {device = ""} : (tensor<2xi64>, tensor) -> tensor<2x1xi64> + %562 = "tf.Tile"(%561, %555) {device = ""} : (tensor<2x1xi64>, tensor<2xi32>) -> tensor<2x?xi64> + %563 = "tf.Reshape"(%562, %558) {device = ""} : (tensor<2x?xi64>, tensor<1xi32>) -> tensor + %564 = "tf.Shape"(%563) {device = ""} : (tensor) -> tensor<1xi64> + %565 = "tf.StridedSlice"(%564, %15, %16, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<1xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %566 = "tf.Sub"(%565, %552) {device = ""} : (tensor, tensor) -> tensor + %567 = "tf.Pack"(%566) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %568 = "tf.StridedSlice"(%563, %11, %567, %12) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %569 = "tf.StridedSlice"(%563, %559, %11, %12) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor + %570:2 = "tf.RaggedRange"(%568, %569, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %571 = "tf.GatherV2"(%355, %570#1, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %572 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor + %573 = "tf.BroadcastTo"(%572, %545) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %574 = "tf.Max"(%573, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %575 = "tf.Maximum"(%14, %574) {device = ""} : (tensor, tensor) -> tensor + %576 = "tf.Range"(%14, %575, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %577 = "tf.Pack"(%7, %575) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %578 = "tf.Tile"(%542, %577) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %579 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %580 = "tf.StridedSlice"(%579, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %581 = "tf.Prod"(%580, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %582 = "tf.Pack"(%581) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %583 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %584 = "tf.StridedSlice"(%583, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %585 = "tf.Shape"(%578) {device = ""} : (tensor) -> tensor<2xi32> + %586 = "tf.StridedSlice"(%585, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %587 = "tf.ConcatV2"(%584, %582, %586, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %588 = "tf.Reshape"(%578, %587) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %589 = "tf.ExpandDims"(%573, %3) {device = ""} : (tensor, tensor) -> tensor + %590 = "tf.Less"(%576, %589) {device = ""} : (tensor, tensor) -> tensor + %591 = "tf.Reshape"(%590, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %592 = "tf.Where"(%591) {device = ""} : (tensor) -> tensor + %593 = "tf.Squeeze"(%592) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %594 = "tf.GatherV2"(%588, %593, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %595 = "tf.Cast"(%571) {Truncate = false, device = ""} : (tensor) -> tensor + %596 = "tf.BroadcastTo"(%595, %550) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %597 = "tf.Max"(%596, %15) {device = "", keep_dims = false} : (tensor, tensor<1xi32>) -> tensor + %598 = "tf.Maximum"(%14, %597) {device = ""} : (tensor, tensor) -> tensor + %599 = "tf.Range"(%14, %598, %7) {device = ""} : (tensor, tensor, tensor) -> tensor + %600 = "tf.Pack"(%7, %598) {axis = 0 : i64, device = ""} : (tensor, tensor) -> tensor<2xi32> + %601 = "tf.Tile"(%547, %600) {device = ""} : (tensor, tensor<2xi32>) -> tensor + %602 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %603 = "tf.StridedSlice"(%602, %15, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32> + %604 = "tf.Prod"(%603, %15) {device = "", keep_dims = false} : (tensor<2xi32>, tensor<1xi32>) -> tensor + %605 = "tf.Pack"(%604) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi32> + %606 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %607 = "tf.StridedSlice"(%606, %15, %15, %16) {begin_mask = 1 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %608 = "tf.Shape"(%601) {device = ""} : (tensor) -> tensor<2xi32> + %609 = "tf.StridedSlice"(%608, %6, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<2xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %610 = "tf.ConcatV2"(%607, %605, %609, %14) {device = ""} : (tensor<0xi32>, tensor<1xi32>, tensor<0xi32>, tensor) -> tensor<1xi32> + %611 = "tf.Reshape"(%601, %610) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %612 = "tf.ExpandDims"(%596, %3) {device = ""} : (tensor, tensor) -> tensor + %613 = "tf.Less"(%599, %612) {device = ""} : (tensor, tensor) -> tensor + %614 = "tf.Reshape"(%613, %5) {device = ""} : (tensor, tensor<1xi32>) -> tensor + %615 = "tf.Where"(%614) {device = ""} : (tensor) -> tensor + %616 = "tf.Squeeze"(%615) {device = "", squeeze_dims = [1]} : (tensor) -> tensor + %617 = "tf.GatherV2"(%611, %616, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %618:2 = "tf.RaggedRange"(%594, %617, %13) {T = i64, Tsplits = i64, device = ""} : (tensor, tensor, tensor) -> (tensor, tensor) + %619 = "tf.If"(%496, %496, %492, %13) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970} : (tensor, tensor, tensor, tensor) -> tensor + %620 = "tf.Identity"(%619) {device = ""} : (tensor) -> tensor + %621 = "tf.Select"(%2, %492, %13) {device = ""} : (tensor, tensor, tensor) -> tensor + %622 = "tf.Pack"(%621) {axis = 0 : i64, device = ""} : (tensor) -> tensor<1xi64> + %623 = "tf.ConcatV2"(%1, %622, %12, %14) {device = ""} : (tensor<0xi64>, tensor<1xi64>, tensor<1xi64>, tensor) -> tensor<2xi64> + %624 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %625 = "tf.Equal"(%624, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %626 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %627 = "tf.StridedSlice"(%623, %16, %6, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 0 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 1 : i64} : (tensor<2xi64>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %628 = "tf.Equal"(%627, %13) {device = "", incompatible_shape_error = true} : (tensor, tensor) -> tensor + %629 = "tf.If"(%628, %628, %627, %571) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460} : (tensor, tensor, tensor, tensor) -> tensor + %630 = "tf.Identity"(%629) {device = ""} : (tensor) -> tensor + %631 = "tf.If"(%625, %625, %571, %626) {_lower_using_switch_merge = true, _read_only_resource_inputs = [], device = "", else_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830, is_stateless = false, output_shapes = [#tf.shape<>], then_branch = @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820} : (tensor, tensor, tensor, tensor) -> tensor + %632 = "tf.Identity"(%631) {device = ""} : (tensor) -> tensor + %633 = "tf.Identity"(%308) {device = ""} : (tensor) -> tensor + %634 = "tf.Shape"(%37#2) {device = ""} : (tensor) -> tensor<1xi32> + %635 = "tf.StridedSlice"(%634, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %636 = "tf.Cast"(%635) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %637 = "tf.Identity"(%636) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %638 = "tf.Shape"(%37#3) {device = ""} : (tensor) -> tensor<1xi32> + %639 = "tf.StridedSlice"(%638, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %640 = "tf.Cast"(%639) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %641 = "tf.Identity"(%640) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %642 = "tf.GatherV2"(%37#3, %336, %14) {batch_dims = 0 : i64, device = ""} : (tensor, tensor, tensor) -> tensor + %643 = "tf.Tile"(%642, %340) {device = ""} : (tensor, tensor<1xi64>) -> tensor + %644 = "tf.Sub"(%643, %474) {device = ""} : (tensor, tensor) -> tensor + %645 = "tf.Shape"(%644) {device = ""} : (tensor) -> tensor<1xi32> + %646 = "tf.StridedSlice"(%645, %16, %15, %16) {begin_mask = 0 : i64, device = "", ellipsis_mask = 0 : i64, end_mask = 1 : i64, new_axis_mask = 0 : i64, shrink_axis_mask = 0 : i64} : (tensor<1xi32>, tensor<1xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<0xi32> + %647 = "tf.Cast"(%646) {Truncate = false, device = ""} : (tensor<0xi32>) -> tensor<0xi64> + %648 = "tf.Identity"(%647) {device = ""} : (tensor<0xi64>) -> tensor<0xi64> + %649 = "tf.UnicodeEncode"(%37#0, %58) {Tsplits = i64, device = "", errors = "replace", output_encoding = "UTF-8", replacement_char = 65533 : i64} : (tensor, tensor) -> tensor + %650 = "tf.Identity"(%649) {device = ""} : (tensor) -> tensor + return %650 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_false_3220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Input tensors have incompatible shapes."> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedFromTensor/Const:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedConcat/RaggedNRows/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedConcat_assert_equal_1_Assert_AssertGuard_true_3210(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_3980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_3970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_4340(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_4330(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_4680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_4670(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_5050(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_5040(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_5410(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_5400(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_false_5770(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits/RaggedFromRowSplits_1/RaggedNRows/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_RaggedFromRowSplits_1_assert_equal_1_Assert_AssertGuard_true_5760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6120(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6110(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_6480(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_6470(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_6820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_1/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_1_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_6810(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7190(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:zero"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/Const:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7180(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_false_7550(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to from_row_splits do not form a valid RaggedTensor:monotonic"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x >= 0 did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/RowPartitionFromRowSplits/sub:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor) -> () + %3 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %4 = "tf.Identity"(%3) {device = ""} : (tensor) -> tensor + return %4 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_RowPartitionFromRowSplits_assert_non_negative_assert_less_equal_Assert_AssertGuard_true_7540(%arg0: tensor, %arg1: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_false_7890(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Arguments to _from_row_partition do not form a valid RaggedTensor"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<"Condition x == y did not hold element-wise:"> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"x (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice_1:0) = "> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"y (WhitespaceTokenize/WhitespaceTokenize/RaggedFromNestedRowSplits_2/RaggedFromRowSplits/strided_slice:0) = "> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 3 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedFromNestedRowSplits_2_RaggedFromRowSplits_assert_equal_1_Assert_AssertGuard_true_7880(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_false_8690(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_AssertGuard_true_8680(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_false_9760(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_1_AssertGuard_true_9750(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_false_10250(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_2_AssertGuard_true_10240(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_false_10610(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_Assert_3_AssertGuard_true_10600(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_false_15310(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_AssertGuard_true_15300(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_false_16380(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_1_AssertGuard_true_16370(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_false_16870(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_2_AssertGuard_true_16860(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_false_17230(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_Assert_3_AssertGuard_true_17220(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_false_21910(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_AssertGuard_true_21900(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_false_22980(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<0> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_1_AssertGuard_true_22970(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_false_23470(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_2_AssertGuard_true_23460(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape<>, #tf.shape]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_false_23830(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>], tf.signature.is_stateful} { + %0 = "tf.Const"() {value = dense<"Unable to broadcast: dimension size mismatch in dimension"> : tensor} : () -> tensor + %1 = "tf.Const"() {value = dense<1> : tensor} : () -> tensor + %2 = "tf.Const"() {value = dense<"lengths="> : tensor} : () -> tensor + %3 = "tf.Const"() {value = dense<"dim_size="> : tensor} : () -> tensor + "tf.Assert"(%arg0, %0, %1, %2, %arg1, %3, %arg2) {device = "", summarize = 10 : i64} : (tensor, tensor, tensor, tensor, tensor, tensor, tensor) -> () + %4 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %5 = "tf.Identity"(%4) {device = ""} : (tensor) -> tensor + return %5 : tensor + } + func @WhitespaceTokenize_WhitespaceTokenize_RaggedGather_1_Assert_3_AssertGuard_true_23820(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor attributes {tf._input_shapes = [#tf.shape<>, #tf.shape, #tf.shape<>]} { + %0 = "tf.Identity"(%arg0) {device = ""} : (tensor) -> tensor + %1 = "tf.Identity"(%0) {device = ""} : (tensor) -> tensor + return %1 : tensor + } + + // CHECK: func @whitespace_tokenizer_rank0(%arg0: tensor {tf._user_specified_name = "input"}) -> tensor attributes {tf._input_shapes = [#tf.shape<>], tf.api_implements = "tftext:WhitespaceTokenizer", tf.signature.is_stateful} { + // CHECK: %0 = "tfl.custom"(%arg0) {custom_code = "tftext:WhitespaceTokenizer", custom_option = opaque<"tfl", "0x"> : tensor<0xi8>} : (tensor) -> tensor + // CHECK: return %0 : tensor } diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc index 12929152d1e..cb671c7cd70 100644 --- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc +++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc @@ -67,27 +67,57 @@ inline RankedTensorType getResultType(mlir::FuncOp func, int idx) { } LogicalResult VerifyWhitespaceTokenizer(mlir::FuncOp func) { - if (func.getNumResults() != 2) { - return failure(); - } - if (func.getNumArguments() != 1) { - return failure(); - } + // In the case of input tensor with 0 rank. + // Whitespace tokenizer generates 1 output: + // * String tensor for tokens. + // + // In the case of 1-D input tensor, + // Whitespace tokenizer generates 2 outputs to make up a ragged tensor: + // * 1st output is the value of ragged tensor; + // * 2nd output is the offset. + // + // In the case of batched input tesnor, + // Whitespace tokenizer has 3 outputs to make up a nested ragged tensor: + // * 1st output is the value of ragged tensor; + // * 2nd output is the inner offset; + // * 3rd output is the outer offset. auto input_type = getInputType(func, 0); - if (!input_type || input_type.getRank() != 1 || - !input_type.getElementType().isa()) { - return failure(); + if (!input_type || !input_type.getElementType().isa() || + !input_type.hasRank()) { + return func.emitError() << "Input should be a string tensor"; } + + const std::vector kValidNumOfOutput = {1, 2, 3}; + if (input_type.getRank() >= kValidNumOfOutput.size()) { + return func.emitError() + << "Unrecognized input rank: " << input_type.getRank(); + } + if (func.getNumResults() != kValidNumOfOutput[input_type.getRank()]) { + return func.emitError() + << "Expect " << kValidNumOfOutput[input_type.getRank()] + << "output(s) when input has rank " << input_type.getRank(); + } + auto value_type = getResultType(func, 0); - if (!value_type || value_type.getRank() != 1 || + if (!value_type || !value_type.hasRank() || value_type.getRank() != 1 || !value_type.getElementType().isa()) { - return failure(); + return func.emitError() << "1st output should be string tensor"; } - auto offset_type = getResultType(func, 1); - if (offset_type.getRank() != 1 || - !offset_type.getElementType().isInteger(64)) { - return failure(); + if (func.getNumResults() > 1) { + auto offset_type = getResultType(func, 1); + if (!offset_type || !offset_type.hasRank() || offset_type.getRank() != 1 || + !offset_type.getElementType().isInteger(64)) { + return func.emitError() << "2nd output should be int64 tensor"; + } } + if (func.getNumResults() > 2) { + auto offset_type = getResultType(func, 2); + if (!offset_type || !offset_type.hasRank() || offset_type.getRank() != 1 || + !offset_type.getElementType().isInteger(64)) { + return func.emitError() << "3rd output should be int64 tensor"; + } + } + return success(); } @@ -96,19 +126,12 @@ LogicalResult ConvertWhitespaceTokenizer(mlir::FuncOp func, func.eraseBody(); func.addEntryBlock(); func.setAttr(kTFAPIImplements, StringAttr::get(api, func.getContext())); - Value text = func.getArgument(0); - auto output_type = func.getType().getResult(0); - auto offset_type = func.getType().getResult(1); - SmallVector shape = {output_type, offset_type}; - ArrayRef output_types(shape); - OpBuilder builder(func.getBody()); - auto op = builder.create(func.getLoc(), output_types, - ValueRange(text), api, - emptyCustomOption(&builder)); - + auto op = builder.create( + func.getLoc(), func.getType().getResults(), ValueRange(text), api, + emptyCustomOption(&builder)); builder.create(func.getLoc(), op.getResults()); return success(); } From b10aa217a4b1451b832b83b59b3c56e5e0109485 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 9 Jun 2020 12:02:18 -0700 Subject: [PATCH 129/178] [NFC] Fix formatting, comments, and a spelling - Also move the implementation in while_loop_outline.cc into an anonymous namespace RELNOTES:n/a PiperOrigin-RevId: 315530618 Change-Id: I4cc67b73d3556407a828a95035ead03dcda71e23 --- .../mlir/lite/transforms/while_loop_outline.cc | 6 +++--- .../transforms/functional_control_flow_to_cfg.cc | 10 +--------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc index 707f4aba881..106b0f9af83 100644 --- a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc +++ b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc @@ -54,7 +54,6 @@ class WhileOutlinePass tensorflow::OpOrArgLocNameMapper mapper_; }; -} // namespace std::string WhileOutlinePass::GetName(Operation* op, StringRef suffix) { return (mapper_.GetUniqueName(op) + suffix).str(); @@ -62,7 +61,7 @@ std::string WhileOutlinePass::GetName(Operation* op, StringRef suffix) { // Returns whether the WhileOp is already outlined (e.g., only consists of calls // to functions). -static bool IsAlreadyOutlinedd(WhileOp while_op) { +bool IsAlreadyOutlined(WhileOp while_op) { auto just_call = [](Region& region) { auto it = region.front().begin(); if (!isa(*it)) return false; @@ -120,7 +119,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) { } // Skip if already just calls. - if (extra_operands.empty() && IsAlreadyOutlinedd(while_op)) return; + if (extra_operands.empty() && IsAlreadyOutlined(while_op)) return; // Collect new types. SmallVector types; @@ -238,6 +237,7 @@ void WhileOutlinePass::runOnOperation() { getOperation().walk( [&](mlir::TFL::WhileOp while_op) { OutlineWhile(while_op); }); } +} // namespace // Creates an instance of the TensorFlow Lite dialect WhileOp outline pass. std::unique_ptr> CreateWhileOutlinePass() { diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc index 91bbac235e9..a0be88cc564 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc @@ -14,7 +14,7 @@ limitations under the License. ==============================================================================*/ // This transformation pass transforms functional control flow operations in the -// standard TensorFlow dialect to MLIR Control Flow Graph (CFG) form. +// TensorFlow dialect to MLIR Control Flow Graph (CFG) form. #include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project #include "mlir/IR/Attributes.h" // from @llvm-project @@ -52,7 +52,6 @@ static Value LowerCondition(Location loc, Value value, OpBuilder* builder) { // // Requires the function to provide arguments for each of the `fn` operands // that is compatible for tensor cast. -// static Operation* CallFn(Location loc, const std::function& get_arg, FuncOp fn, OpBuilder* builder) { FunctionType fn_type = fn.getType(); @@ -113,7 +112,6 @@ static void JumpToBlock(Location loc, const std::function& get_arg, // Requires that the block has same number of arguments as number of results of // the operation and either they have same types or are more generic types and // it is possible to cast them to results' types. -// static void ReplaceOpResultWithBlockArgs(Location loc, Operation* op, Block* block, OpBuilder* builder) { assert(op->getNumResults() == block->getNumArguments()); @@ -132,9 +130,6 @@ static void ReplaceOpResultWithBlockArgs(Location loc, Operation* op, // Given a functional IfOp, transforms the enclosing code to eliminate it // completely from the IR, breaking it into operations to evaluate the condition // as a bool, plus some branches. -// -// This returns true on failure. -// static LogicalResult LowerIfOp(IfOp op) { Operation* op_inst = op.getOperation(); Location loc = op_inst->getLoc(); @@ -193,9 +188,6 @@ static LogicalResult LowerIfOp(IfOp op) { // Given a functional WhileOp, transforms the enclosing code to eliminate it // completely from the IR, breaking it into operations to execute the loop body // repeatedly while the loop condition is true. -// -// This returns true on failure. -// static LogicalResult LowerWhileOp(WhileOp op) { Operation* op_inst = op.getOperation(); Location loc = op_inst->getLoc(); From cdaa8b5c2c57f320c8633d01b437d9c150a04afc Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Tue, 9 Jun 2020 12:19:13 -0700 Subject: [PATCH 130/178] Remove dependency from delegates/gpu to micro. PiperOrigin-RevId: 315534587 Change-Id: I0afba418e420c5d5b8a0bb3cc69c8ec96e4d152f --- tensorflow/lite/delegates/gpu/common/BUILD | 1 - .../gpu/common/quantization_util_test.cc | 136 +++++++++++++++--- 2 files changed, 115 insertions(+), 22 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD index 333c54f145f..e9877b63fb3 100644 --- a/tensorflow/lite/delegates/gpu/common/BUILD +++ b/tensorflow/lite/delegates/gpu/common/BUILD @@ -222,7 +222,6 @@ cc_test( deps = [ ":quantization_util", "//tensorflow/lite:util", - "//tensorflow/lite/micro/testing:micro_test", "@com_google_googletest//:gtest_main", ], ) diff --git a/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc b/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc index 1ca6922dfe4..064a2a2e6b2 100644 --- a/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc +++ b/tensorflow/lite/delegates/gpu/common/quantization_util_test.cc @@ -17,7 +17,6 @@ limitations under the License. #include #include -#include "tensorflow/lite/micro/testing/test_utils.h" #include "tensorflow/lite/util.h" using ::testing::Eq; @@ -36,22 +35,120 @@ std::unique_ptr BuildTfLiteIntArray( return result; } +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +void PopulateContext(std::vector& tensors, + TfLiteContext& context) { + context.tensors_size = tensors.size(); + context.tensors = tensors.data(); + context.recommended_num_threads = 1; +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +int ElementCount(const TfLiteIntArray& dims) { + int result = 1; + for (int i = 0; i < dims.size; ++i) { + result *= dims.data[i]; + } + return result; +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +template +inline float ScaleFromMinMax(const float min, const float max) { + return (max - min) / ((std::numeric_limits::max() * 1.0) - + std::numeric_limits::min()); +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +template +inline int ZeroPointFromMinMax(const float min, const float max) { + return static_cast(std::numeric_limits::min()) + + static_cast(-min / ScaleFromMinMax(min, max) + 0.5f); +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +TfLiteTensor CreateQuantizedTensor(const int8_t* data, TfLiteIntArray* dims, + const char* name, float min, float max, + bool is_variable) { + TfLiteTensor result; + result.type = kTfLiteInt8; + result.data.int8 = const_cast(data); + result.dims = dims; + result.params = {ScaleFromMinMax(min, max), + ZeroPointFromMinMax(min, max)}; + result.allocation_type = kTfLiteMemNone; + result.bytes = ElementCount(*dims) * sizeof(int8_t); + result.allocation = nullptr; + result.name = name; + result.is_variable = is_variable; + return result; +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +TfLiteTensor CreateQuantizedTensor(const uint8_t* data, TfLiteIntArray* dims, + const char* name, float min, float max, + bool is_variable) { + TfLiteTensor result; + result.type = kTfLiteUInt8; + result.data.uint8 = const_cast(data); + result.dims = dims; + result.params = {ScaleFromMinMax(min, max), + ZeroPointFromMinMax(min, max)}; + result.allocation_type = kTfLiteMemNone; + result.bytes = ElementCount(*dims) * sizeof(uint8_t); + result.allocation = nullptr; + result.name = name; + result.is_variable = false; + return result; +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +TfLiteTensor CreateTensor(TfLiteIntArray* dims, const char* name, + bool is_variable) { + TfLiteTensor result; + result.dims = dims; + result.name = name; + result.params = {}; + result.quantization = {kTfLiteNoQuantization, nullptr}; + result.is_variable = is_variable; + result.allocation_type = kTfLiteMemNone; + result.allocation = nullptr; + return result; +} + +// TODO(b/158578883): this function is copied from the Micro codebase. Consider +// moving to a shared location. +TfLiteTensor CreateFloatTensor(const float* data, TfLiteIntArray* dims, + const char* name, bool is_variable) { + TfLiteTensor result = CreateTensor(dims, name, is_variable); + result.type = kTfLiteFloat32; + result.data.f = const_cast(data); + result.bytes = ElementCount(*dims) * sizeof(float); + return result; +} + TEST(DequantizeInputs, Int8) { TfLiteContext context; auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1}); std::vector data = {-3, -2, -1, 1, 2, 3}; std::vector dequantized_data(data.size()); - TfLiteTensor input = tflite::testing::CreateQuantizedTensor( + TfLiteTensor input = CreateQuantizedTensor( data.data(), input_dims.get(), "input", /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/false); - TfLiteTensor dequantized_input = tflite::testing::CreateFloatTensor( + TfLiteTensor dequantized_input = CreateFloatTensor( dequantized_data.data(), input_dims.get(), "input_dequant", /*is_variable=*/true); std::vector tensors{input, dequantized_input}; - tflite::testing::PopulateContext(tensors.data(), tensors.size(), - /*error_reporter=*/nullptr, &context); + PopulateContext(tensors, context); std::vector input_indices = {1}; std::unordered_map quant_conversion_map = {{1, 0}}; @@ -68,16 +165,15 @@ TEST(DequantizeInputs, UInt8) { std::vector data = {0, 1, 2, 3, 4, 5}; std::vector dequantized_data(data.size()); - TfLiteTensor input = tflite::testing::CreateQuantizedTensor( - data.data(), input_dims.get(), "input", - /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/false); - TfLiteTensor dequantized_input = tflite::testing::CreateFloatTensor( + TfLiteTensor input = + CreateQuantizedTensor(data.data(), input_dims.get(), "input", + /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/false); + TfLiteTensor dequantized_input = CreateFloatTensor( dequantized_data.data(), input_dims.get(), "input_dequant", /*is_variable=*/true); std::vector tensors{input, dequantized_input}; - tflite::testing::PopulateContext(tensors.data(), tensors.size(), - /*error_reporter=*/nullptr, &context); + PopulateContext(tensors, context); std::vector input_indices = {1}; std::unordered_map quant_conversion_map = {{1, 0}}; @@ -93,15 +189,14 @@ TEST(QuantizeOutputs, Int8) { auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1}); std::vector data = {-0.3, -0.2, -0.1, 0.1, 0.2, 0.3}; std::vector quantized_data(data.size()); - TfLiteTensor output = tflite::testing::CreateFloatTensor( - data.data(), input_dims.get(), "output", /*is_variable=*/false); - TfLiteTensor quantized_output = tflite::testing::CreateQuantizedTensor( + TfLiteTensor output = CreateFloatTensor(data.data(), input_dims.get(), + "output", /*is_variable=*/false); + TfLiteTensor quantized_output = CreateQuantizedTensor( quantized_data.data(), input_dims.get(), "output_quant", /*min=*/-12.8f, /*max=*/12.7f, /*is_variable=*/true); std::vector tensors{output, quantized_output}; - tflite::testing::PopulateContext(tensors.data(), tensors.size(), - /*error_reporter=*/nullptr, &context); + PopulateContext(tensors, context); std::vector output_indices = {0}; std::unordered_map quant_conversion_map = {{0, 1}}; @@ -116,15 +211,14 @@ TEST(QuantizeOutputs, UInt8) { auto input_dims = BuildTfLiteIntArray({1, 3, 2, 1}); std::vector data = {0.0, 0.1, 0.2, 0.3, 0.4, 0.5}; std::vector quantized_data(data.size()); - TfLiteTensor output = tflite::testing::CreateFloatTensor( - data.data(), input_dims.get(), "output", /*is_variable=*/false); - TfLiteTensor quantized_output = tflite::testing::CreateQuantizedTensor( + TfLiteTensor output = CreateFloatTensor(data.data(), input_dims.get(), + "output", /*is_variable=*/false); + TfLiteTensor quantized_output = CreateQuantizedTensor( quantized_data.data(), input_dims.get(), "output_quant", /*min=*/0.0f, /*max=*/25.5f, /*is_variable=*/true); std::vector tensors{output, quantized_output}; - tflite::testing::PopulateContext(tensors.data(), tensors.size(), - /*error_reporter=*/nullptr, &context); + PopulateContext(tensors, context); std::vector output_indices = {0}; std::unordered_map quant_conversion_map = {{0, 1}}; From 099b5eeeadd0b97128a55aa0c4e7eaeee552bcb9 Mon Sep 17 00:00:00 2001 From: Jose Baiocchi Date: Tue, 9 Jun 2020 12:22:36 -0700 Subject: [PATCH 131/178] Make TraceMeEncode compile with C++14 PiperOrigin-RevId: 315535324 Change-Id: I2d6bebd93a283d36b09f9b29fd84503402b4a5ac --- tensorflow/core/profiler/lib/BUILD | 14 ++++- tensorflow/core/profiler/lib/traceme_encode.h | 43 +++++++------ .../core/profiler/lib/traceme_encode_test.cc | 63 +++++++++++++++++++ 3 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/profiler/lib/traceme_encode_test.cc diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index e80b9fc9766..2e32552e076 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -1,5 +1,5 @@ load("//tensorflow/core/platform:build_config_root.bzl", "if_static") -load("//tensorflow:tensorflow.bzl", "if_not_android", "tf_cuda_library") +load("//tensorflow:tensorflow.bzl", "if_not_android", "tf_cc_test", "tf_cuda_library") load("//tensorflow:tensorflow.bzl", "tf_pybind_cc_library_wrapper") package( @@ -113,6 +113,18 @@ cc_library( ], ) +tf_cc_test( + name = "traceme_encode_test", + srcs = ["traceme_encode_test.cc"], + deps = [ + ":traceme_encode", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/platform", + "@com_google_absl//absl/strings", + ], +) + cc_library( name = "annotated_traceme", hdrs = ["annotated_traceme.h"], diff --git a/tensorflow/core/profiler/lib/traceme_encode.h b/tensorflow/core/profiler/lib/traceme_encode.h index 2e23c6d878b..91b23740fc3 100644 --- a/tensorflow/core/profiler/lib/traceme_encode.h +++ b/tensorflow/core/profiler/lib/traceme_encode.h @@ -19,7 +19,6 @@ limitations under the License. #include #include -#include #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" @@ -28,6 +27,19 @@ limitations under the License. namespace tensorflow { namespace profiler { + +// An argument passed to TraceMeEncode. +struct TraceMeArg { + // This constructor is required because absl::AlphaNum is non-copyable. + template + TraceMeArg(absl::string_view k, Value v) : key(k), value(v) {} + + TF_DISALLOW_COPY_AND_ASSIGN(TraceMeArg); + + absl::string_view key; + absl::AlphaNum value; +}; + namespace traceme_internal { // Copies the contents of str to the address pointed by out. @@ -45,23 +57,21 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline char* Append(char* out, // Appends args encoded as TraceMe metadata to name. TF_ATTRIBUTE_ALWAYS_INLINE inline std::string AppendArgs( - std::string name, - const std::initializer_list>& - args) { + std::string name, std::initializer_list args) { if (TF_PREDICT_TRUE(args.size() > 0)) { const auto old_size = name.size(); auto new_size = old_size + args.size() * 2 + 1; for (const auto& arg : args) { - new_size += arg.first.size() + arg.second.size(); + new_size += arg.key.size() + arg.value.size(); } name.resize(new_size); char* const begin = &name[0]; char* out = begin + old_size; *out++ = '#'; for (const auto& arg : args) { - out = Append(out, arg.first); + out = Append(out, arg.key); *out++ = '='; - out = Append(out, arg.second.Piece()); + out = Append(out, arg.value.Piece()); *out++ = ','; } *(out - 1) = '#'; @@ -92,19 +102,16 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline void AppendMetadata( // TraceMe trace_me([value1]() { // return TraceMeEncode("my_trace", {{"key1", value1}, {"key2", 42}}); // }); -inline std::string TraceMeEncode( - std::string name, - std::initializer_list> args) { +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeEncode( + std::string name, std::initializer_list args) { return traceme_internal::AppendArgs(std::move(name), args); } -inline std::string TraceMeEncode( - absl::string_view name, - std::initializer_list> args) { +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeEncode( + absl::string_view name, std::initializer_list args) { return traceme_internal::AppendArgs(std::string(name), args); } -inline std::string TraceMeEncode( - const char* name, - std::initializer_list> args) { +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeEncode( + const char* name, std::initializer_list args) { return traceme_internal::AppendArgs(std::string(name), args); } @@ -116,8 +123,8 @@ inline std::string TraceMeEncode( // trace_me.AppendMetadata([value1]() { // return TraceMeEncode({{"key1", value1}, {"key2", 42}}); // }); -inline std::string TraceMeEncode( - std::initializer_list> args) { +TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeEncode( + std::initializer_list args) { return traceme_internal::AppendArgs(std::string(), args); } diff --git a/tensorflow/core/profiler/lib/traceme_encode_test.cc b/tensorflow/core/profiler/lib/traceme_encode_test.cc new file mode 100644 index 00000000000..0cd84e31a56 --- /dev/null +++ b/tensorflow/core/profiler/lib/traceme_encode_test.cc @@ -0,0 +1,63 @@ +/* Copyright 2020 The TensorFlow Authors All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/profiler/lib/traceme_encode.h" + +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace profiler { +namespace { + +TEST(TraceMeEncodeTest, NoArgTest) { + EXPECT_EQ(TraceMeEncode("Hello!", {}), "Hello!"); +} + +TEST(TraceMeEncodeTest, OneArgTest) { + EXPECT_EQ(TraceMeEncode("Hello", {{"context", "World"}}), + "Hello#context=World#"); +} + +TEST(TraceMeEncodeTest, TwoArgsTest) { + EXPECT_EQ(TraceMeEncode("Hello", {{"context", "World"}, {"request_id", 42}}), + "Hello#context=World,request_id=42#"); +} + +TEST(TraceMeEncodeTest, ThreeArgsTest) { + EXPECT_EQ(TraceMeEncode("Hello", {{"context", "World"}, + {"request_id", 42}, + {"addr", absl::Hex(0xdeadbeef)}}), + "Hello#context=World,request_id=42,addr=deadbeef#"); +} + +#if !defined(PLATFORM_WINDOWS) +TEST(TraceMeEncodeTest, TemporaryStringTest) { + EXPECT_EQ(TraceMeEncode("Hello", {{std::string("context"), + absl::StrCat("World:", 2020)}}), + "Hello#context=World:2020#"); +} +#endif + +TEST(TraceMeEncodeTest, NoNameTest) { + EXPECT_EQ(TraceMeEncode({{"context", "World"}, {"request_id", 42}}), + "#context=World,request_id=42#"); +} + +} // namespace +} // namespace profiler +} // namespace tensorflow From 9c236222b3fd650af74a8318d0a0bf3ed1088c6b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 12:23:05 -0700 Subject: [PATCH 132/178] [tf.data] Add comments for the input time computation in the InterleaveMany node. PiperOrigin-RevId: 315535430 Change-Id: I0587175a9288c465676225b573d9ac136fbec361 --- tensorflow/core/framework/model.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc index 223dbfef3c2..fd28cfe7f6a 100644 --- a/tensorflow/core/framework/model.cc +++ b/tensorflow/core/framework/model.cc @@ -59,6 +59,11 @@ class InterleaveMany : public Node { (*input_times)[long_name()] = old_input_time; return; } + // Here `old_input_time + SelfProcessingTimeLocked()` is the average input + // time for the interleave node to call one of the `(num_inputs() - 1)` + // input nodes(except the first one) to return an element. Regardless of the + // `block_length` parameter of interleave node, the average input time for + // any of the `(num_inputs() - 1)` input nodes to be called is computed as: double new_input_time = (old_input_time + SelfProcessingTimeLocked()) * static_cast(num_inputs() - 1); (*input_times)[long_name()] = new_input_time; From 1145cc3855b216f872cb0596a6cf90c86edc9b88 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 12:25:55 -0700 Subject: [PATCH 133/178] For Runtime Metadata, HostTracer is not required, it supposed to only capture GPU events, CPU side of data are collected by other means than TraceMe. PiperOrigin-RevId: 315536038 Change-Id: Ie028fbeeaa07696eaf78e0393c52841903acea1f --- tensorflow/core/common_runtime/direct_session.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 96938bcbafd..41847c31dfb 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -666,7 +666,10 @@ Status DirectSession::RunInternal( std::unique_ptr profiler_session; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { - profiler_session = ProfilerSession::Create(); + ProfileOptions options = ProfilerSession::DefaultOptions(); + options.set_host_tracer_level(0); + options.set_device_type(ProfileOptions::GPU); + profiler_session = ProfilerSession::Create(options); } // Register this step with session's cancellation manager, so that From b4b83222d470afbf0b83d12b0824c0f056235655 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 9 Jun 2020 13:14:01 -0700 Subject: [PATCH 134/178] Update the Bazel version for Windows. PiperOrigin-RevId: 315545625 Change-Id: I314053ac6372c9f8160fa3caddd6c8682292e84a --- tensorflow/tools/ci_build/release/common_win.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/release/common_win.bat b/tensorflow/tools/ci_build/release/common_win.bat index 14b6a7e13fc..0918f4c43b0 100644 --- a/tensorflow/tools/ci_build/release/common_win.bat +++ b/tensorflow/tools/ci_build/release/common_win.bat @@ -73,7 +73,7 @@ SET PATH=%CUDNN_INSTALL_PATH%\bin;%PATH% @REM Setup Bazel @REM :: Download Bazel from github and make sure its found in PATH. -SET BAZEL_VERSION=2.0.0 +SET BAZEL_VERSION=3.1.0 md C:\tools\bazel\ wget -q https://github.com/bazelbuild/bazel/releases/download/%BAZEL_VERSION%/bazel-%BAZEL_VERSION%-windows-x86_64.exe -O C:/tools/bazel/bazel.exe SET PATH=C:\tools\bazel;%PATH% From 6eff291a056d06f8c159485f81228f685b6f719c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 13:22:07 -0700 Subject: [PATCH 135/178] Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11. PiperOrigin-RevId: 315547199 Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4 --- .../profiler/internal/cpu/python_tracer.cc | 24 ++++++------ tensorflow/python/profiler/BUILD | 2 + .../python/profiler/internal/python_hooks.cc | 38 +++++++++++++++---- .../python/profiler/internal/python_hooks.h | 11 +++++- .../profiler/internal/traceme_wrapper.cc | 6 ++- tensorflow/python/profiler/trace.py | 2 +- 6 files changed, 57 insertions(+), 26 deletions(-) diff --git a/tensorflow/core/profiler/internal/cpu/python_tracer.cc b/tensorflow/core/profiler/internal/cpu/python_tracer.cc index d684cb8f768..4233c5fdd72 100644 --- a/tensorflow/core/profiler/internal/cpu/python_tracer.cc +++ b/tensorflow/core/profiler/internal/cpu/python_tracer.cc @@ -23,7 +23,6 @@ limitations under the License. #include "tensorflow/core/profiler/profiler_options.pb.h" #include "tensorflow/core/profiler/protobuf/xplane.pb.h" #include "tensorflow/core/protobuf/config.pb.h" -#include "tensorflow/core/util/env_var.h" #include "tensorflow/python/profiler/internal/python_hooks.h" namespace tensorflow { @@ -34,7 +33,8 @@ namespace { // the events to TraceMeRecorder. class PythonTracer : public ProfilerInterface { public: - explicit PythonTracer() = default; + explicit PythonTracer(const PythonHooksOptions& options) + : options_(options) {} ~PythonTracer() override; // Starts recording TraceMes. @@ -51,6 +51,7 @@ class PythonTracer : public ProfilerInterface { private: bool recording_ = false; + const PythonHooksOptions options_; TF_DISALLOW_COPY_AND_ASSIGN(PythonTracer); }; @@ -66,7 +67,7 @@ Status PythonTracer::Start() { } VLOG(1) << __FUNCTION__; recording_ = true; - PythonHooks::GetSingleton()->Start(); + PythonHooks::GetSingleton()->Start(options_); return Status::OK(); } @@ -75,7 +76,7 @@ Status PythonTracer::Stop() { return errors::Internal("TraceMeRecorder not started"); } VLOG(1) << __FUNCTION__; - PythonHooks::GetSingleton()->Stop(); + PythonHooks::GetSingleton()->Stop(options_); recording_ = false; return Status::OK(); } @@ -105,18 +106,15 @@ Status PythonTracer::CollectData(XSpace* space) { // Not in anonymous namespace for testing purposes. std::unique_ptr CreatePythonTracer( const ProfileOptions& options) { - if (options.python_tracer_level() == 0) return nullptr; - // This ProfilerInterface rely on TraceMeRecorder to be active. - if (options.host_tracer_level() == 0) return nullptr; - return absl::make_unique(); + PythonHooksOptions pyhooks_options; + pyhooks_options.enable_trace_python_function = + options.python_tracer_level() && options.host_tracer_level(); + pyhooks_options.enable_python_traceme = options.host_tracer_level() != 0; + return absl::make_unique(pyhooks_options); } auto register_python_tracer_factory = [] { - bool enable; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_ENABLE_OSS_PYTHON_TRACER", true, &enable)); - if (enable) { - RegisterProfilerFactory(&CreatePythonTracer); - } + RegisterProfilerFactory(&CreatePythonTracer); return 0; }(); diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index ffc090a4676..7f9e4512c5a 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -48,6 +48,8 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/python:errors", + "//tensorflow/python:platform", "//tensorflow/python:util", "//tensorflow/python/profiler/internal:_pywrap_profiler", ], diff --git a/tensorflow/python/profiler/internal/python_hooks.cc b/tensorflow/python/profiler/internal/python_hooks.cc index 7c25f402f74..f367372a0ed 100644 --- a/tensorflow/python/profiler/internal/python_hooks.cc +++ b/tensorflow/python/profiler/internal/python_hooks.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/python/profiler/internal/python_hooks.h" +#include "absl/strings/string_view.h" #include "absl/strings/strip.h" #include "tensorflow/core/platform/path.h" @@ -44,16 +45,30 @@ PythonHooks* PythonHooks::GetSingleton() { return singleton; } -void PythonHooks::Start() { - PyGILState_STATE gil_state = PyGILState_Ensure(); - SetProfilerInAllThreads(); - PyGILState_Release(gil_state); +void PythonHooks::Start(const PythonHooksOptions& option) { + if (option.enable_python_traceme || option.enable_trace_python_function) { + PyGILState_STATE gil_state = PyGILState_Ensure(); + if (option.enable_trace_python_function) { + SetProfilerInAllThreads(); + } + if (option.enable_python_traceme) { + EnableTraceMe(true); + } + PyGILState_Release(gil_state); + } } -void PythonHooks::Stop() { - PyGILState_STATE gil_state = PyGILState_Ensure(); - ClearProfilerInAllThreads(); - PyGILState_Release(gil_state); +void PythonHooks::Stop(const PythonHooksOptions& option) { + if (option.enable_python_traceme || option.enable_trace_python_function) { + PyGILState_STATE gil_state = PyGILState_Ensure(); + if (option.enable_trace_python_function) { + ClearProfilerInAllThreads(); + } + if (option.enable_python_traceme) { + EnableTraceMe(false); + } + PyGILState_Release(gil_state); + } } void PythonHooks::Finalize() { tracemes_.clear(); } @@ -180,5 +195,12 @@ void PythonHooks::ClearProfilerInAllThreads() { ThreadingSetProfile(py::none()); } +void PythonHooks::EnableTraceMe(bool enable) { + const char* kModuleName = + "tensorflow.python.profiler.internal._pywrap_traceme"; + auto trace_module = py::module::import(kModuleName); + trace_module.attr("enabled") = enable; +} + } // namespace profiler } // namespace tensorflow diff --git a/tensorflow/python/profiler/internal/python_hooks.h b/tensorflow/python/profiler/internal/python_hooks.h index 8a9ce645ca9..582edf4a93b 100644 --- a/tensorflow/python/profiler/internal/python_hooks.h +++ b/tensorflow/python/profiler/internal/python_hooks.h @@ -30,19 +30,26 @@ namespace profiler { namespace py = ::pybind11; +struct PythonHooksOptions { + bool enable_trace_python_function = false; + bool enable_python_traceme = true; +}; + // Singleton for tracing python function calls. class PythonHooks { public: static PythonHooks* GetSingleton(); - void Start(); - void Stop(); + void Start(const PythonHooksOptions& option); + void Stop(const PythonHooksOptions& option); void Finalize(); void ProfileSlow(const py::object& frame, const string& event, const py::object& arg); void ProfileFast(PyFrameObject* frame, int what, PyObject* arg); private: + void EnableTraceMe(bool enable); + void SetProfilerInAllThreads(); void ClearProfilerInAllThreads(); diff --git a/tensorflow/python/profiler/internal/traceme_wrapper.cc b/tensorflow/python/profiler/internal/traceme_wrapper.cc index 32a1f423918..bf8a9ba495a 100644 --- a/tensorflow/python/profiler/internal/traceme_wrapper.cc +++ b/tensorflow/python/profiler/internal/traceme_wrapper.cc @@ -23,8 +23,10 @@ namespace py = ::pybind11; using ::tensorflow::profiler::TraceMeWrapper; PYBIND11_MODULE(_pywrap_traceme, m) { + // This variable will be modified by PythonHooks::Start/Stop(). such + // arrangement will reduce the number of calls through pybind11. + m.attr("enabled") = py::bool_(false); py::class_(m, "TraceMe", py::module_local()) .def(py::init()) - .def("SetMetadata", &TraceMeWrapper::SetMetadata) - .def_static("IsEnabled", &TraceMeWrapper::IsEnabled); + .def("SetMetadata", &TraceMeWrapper::SetMetadata); }; diff --git a/tensorflow/python/profiler/trace.py b/tensorflow/python/profiler/trace.py index ea4eb060488..1fdba2abe13 100644 --- a/tensorflow/python/profiler/trace.py +++ b/tensorflow/python/profiler/trace.py @@ -72,7 +72,7 @@ class Trace(object): The example above uses the keyword argument "step_num" to specify the training step being traced. """ - if _pywrap_traceme.TraceMe.IsEnabled(): + if _pywrap_traceme.enabled: # Creating _pywrap_traceme.TraceMe starts the clock. self._traceme = _pywrap_traceme.TraceMe(name, **kwargs) else: From b8a267a9fe95dea518cb04c726031e96874d26a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 13:44:17 -0700 Subject: [PATCH 136/178] For Runtime Metadata, HostTracer is not required, it supposed to only capture GPU events, CPU side of data are collected by other means than TraceMe. PiperOrigin-RevId: 315551551 Change-Id: I5f576aa9469af2c2e411d9b57d6fe36dc6ed02aa --- tensorflow/core/common_runtime/direct_session.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 41847c31dfb..96938bcbafd 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -666,10 +666,7 @@ Status DirectSession::RunInternal( std::unique_ptr profiler_session; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { - ProfileOptions options = ProfilerSession::DefaultOptions(); - options.set_host_tracer_level(0); - options.set_device_type(ProfileOptions::GPU); - profiler_session = ProfilerSession::Create(options); + profiler_session = ProfilerSession::Create(); } // Register this step with session's cancellation manager, so that From 191628f0e5f34f71db15804d2aa83bfb326ad7bf Mon Sep 17 00:00:00 2001 From: Karim Nosir Date: Tue, 9 Jun 2020 13:51:17 -0700 Subject: [PATCH 137/178] - Remove executor_to_control pass. - Remove raise control flow pass. - Cleanup usage in TFLite and other referneces. - Remove skip_control_dialect member in PassConfig PiperOrigin-RevId: 315552807 Change-Id: I4994f6a3c26cbe4845b97e7933272a860d3f15c2 --- .../mlir/lite/common/tfl_pass_config.h | 10 +- .../compiler/mlir/lite/tf_tfl_passes.cc | 28 +- .../mlir/lite/tf_to_tfl_flatbuffer.cc | 6 - tensorflow/compiler/mlir/tensorflow/BUILD | 2 - .../mlir/tensorflow/tests/empty-main.mlir | 2 +- .../tests/executor_to_control_dialect.mlir | 188 -------------- .../tensorflow/tests/raise-control-flow.mlir | 57 ----- .../mlir/tensorflow/transforms/optimize.cc | 2 + .../mlir/tensorflow/transforms/passes.h | 10 +- .../transforms/raise_control_flow.cc | 159 ------------ .../translate/executor_to_control_dialect.cc | 242 ------------------ .../compiler/mlir/tfjs/tf_tfjs_passes.cc | 6 - 12 files changed, 14 insertions(+), 698 deletions(-) delete mode 100644 tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir delete mode 100644 tensorflow/compiler/mlir/tensorflow/tests/raise-control-flow.mlir delete mode 100644 tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc delete mode 100644 tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc diff --git a/tensorflow/compiler/mlir/lite/common/tfl_pass_config.h b/tensorflow/compiler/mlir/lite/common/tfl_pass_config.h index 2ed63fcc794..83ff9971246 100644 --- a/tensorflow/compiler/mlir/lite/common/tfl_pass_config.h +++ b/tensorflow/compiler/mlir/lite/common/tfl_pass_config.h @@ -32,7 +32,6 @@ struct PassConfig { lower_tensor_list_ops(false), trim_functions_whitelist({}), quant_specs(std::move(specs)), - skip_control_dialect(false), form_clusters(false), unfold_batch_matmul(true), legalize_tf_while(true), @@ -49,13 +48,8 @@ struct PassConfig { llvm::ArrayRef trim_functions_whitelist; // All information about quantization. QuantizationSpecs quant_specs; - // If `skip_control_dialect` is true, TF executor dialect is not converted to - // TF control dialect prior to legalization to TF Lite. - // TODO(b/142911013): Remove flag once control dialect is removed. - bool skip_control_dialect; - // If `form_clusters` is true (and `skip_control_dialect` is true), clusters - // are formed by grouping consecutive ops of the same device, under a - // `tf_device.launch` op. + // If `form_clusters` is true , clusters are formed by grouping consecutive + // ops of the same device, under a `tf_device.launch` op. bool form_clusters; // if `unfold_batch_matmul` is true, the tf.BatchMatMul is unfolded to a set // of tfl.fully_connected ops. diff --git a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc index 40420eee697..f23898d9530 100644 --- a/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc +++ b/tensorflow/compiler/mlir/lite/tf_tfl_passes.cc @@ -58,21 +58,10 @@ void AddQuantizationPasses(const mlir::TFL::QuantizationSpecs& quant_specs, void AddTFToTFLConversionPasses(const mlir::TFL::PassConfig& pass_config, mlir::OpPassManager* pass_manager) { - pass_manager->addPass(mlir::tf_executor::CreateSwitchFoldPass()); - if (pass_config.skip_control_dialect) { - // Merge islands. - pass_manager->addPass( - mlir::tf_executor::CreateTFExecutorIslandCoarseningPass()); - // Assuming island coarsening above results in a graph with a single island, - // a canonicalization can be ran to hoist the ops of the single island out. - pass_manager->addPass(mlir::createCanonicalizerPass()); - - if (pass_config.form_clusters) - pass_manager->addPass(mlir::TFDevice::CreateClusterFormationPass()); - } else { - pass_manager->addPass(mlir::CreateTFExecutorToControlDialectConversion()); - pass_manager->addPass(mlir::TFControlFlow::CreateRaiseTFControlFlowPass()); - } + mlir::TF::StandardPipelineOptions standard_pipeline_options; + standard_pipeline_options.enable_inliner = false; + standard_pipeline_options.form_clusters = pass_config.form_clusters; + mlir::TF::CreateTFStandardPipeline(*pass_manager, standard_pipeline_options); if (pass_config.shape_inference) { pass_manager->addPass(mlir::TF::CreateTFShapeInferencePass()); @@ -213,13 +202,8 @@ void CreateTFLStandardPipeline(OpPassManager& pm, OpPassManager& func_pm = pm.nest(); // tf_executor dialect passes - Cleaning up the IR. - func_pm.addPass(tf_executor::CreateSwitchFoldPass()); - func_pm.addPass(tf_executor::CreateTFExecutorGraphPruningPass()); - func_pm.addPass(tf_executor::CreateTFExecutorIslandCoarseningPass()); - - // more cleanup of executor dialect and raise to control flow. - pm.addPass(mlir::CreateTFExecutorToControlDialectConversion()); - pm.addPass(mlir::TFControlFlow::CreateRaiseTFControlFlowPass()); + mlir::TF::StandardPipelineOptions standard_pipeline_options; + mlir::TF::CreateTFStandardPipeline(func_pm, standard_pipeline_options); // This is needed for control flow support with TF TensorList. pm.addPass(mlir::TFL::CreateLowerStaticTensorListPass()); diff --git a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc index 62f64ab63b4..38b96cf833f 100644 --- a/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc +++ b/tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.cc @@ -38,12 +38,6 @@ limitations under the License. #include "tensorflow/lite/tools/optimize/quantize_weights.h" #include "tensorflow/stream_executor/lib/statusor.h" -namespace mlir { -/// Create a pass to convert from the TFExecutor to the TF control dialect. -std::unique_ptr> -CreateTFExecutorToControlDialectConversion(); -} // namespace mlir - namespace tensorflow { using mlir::MLIRContext; diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD index 40add34393b..c74c13de0c2 100644 --- a/tensorflow/compiler/mlir/tensorflow/BUILD +++ b/tensorflow/compiler/mlir/tensorflow/BUILD @@ -431,7 +431,6 @@ cc_library( "transforms/optimize_global_tensors.cc", "transforms/parallel_execute_to_islands.cc", "transforms/promote_resources_to_args.cc", - "transforms/raise_control_flow.cc", "transforms/readonly_references_to_resources.cc", "transforms/replicate_invariant_op_hoisting.cc", "transforms/replicate_to_island.cc", @@ -460,7 +459,6 @@ cc_library( "transforms/tpu_variable_runtime_reformatting.cc", "translate/breakup-islands.cc", "translate/control_to_executor_dialect.cc", - "translate/executor_to_control_dialect.cc", "translate/tf_functional_to_executor.cc", ], hdrs = [ diff --git a/tensorflow/compiler/mlir/tensorflow/tests/empty-main.mlir b/tensorflow/compiler/mlir/tensorflow/tests/empty-main.mlir index 4a4aa277067..b5a9b84bc4a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/empty-main.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/empty-main.mlir @@ -1,4 +1,4 @@ -// RUN: tf-opt -tf-executor-to-control-conversion %s | FileCheck %s --check-prefix=CONTROL --dump-input=fail +// RUN: tf-opt -tf-executor-graph-pruning %s | FileCheck %s --check-prefix=CONTROL --dump-input=fail // RUN: tf-opt -tf-control-to-executor-conversion %s | FileCheck %s --check-prefix=EXECUTOR --dump-input=fail // CONTROL-LABEL: func @main diff --git a/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir b/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir deleted file mode 100644 index 5ecef050055..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/tests/executor_to_control_dialect.mlir +++ /dev/null @@ -1,188 +0,0 @@ -// RUN: tf-opt -tf-executor-to-control-conversion %s | FileCheck %s --dump-input=fail -// CHECK-LABEL: func @LoopTest() { -func @LoopTest() { - tf_executor.graph { - %0:2 = tf_executor.island { - %cst = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor - tf_executor.yield %cst : tensor - } - %1:2 = tf_executor.Enter %0#0 frame "while/while_context" : (tensor) -> (tensor<*xi32>, !tf_executor.control) {T = "tfdtype$DT_INT32", device = "", name = "while/Enter"} - %2 = tf_executor.island { - "tf.NoOp"() {device = "", name = "cluster/pivot"} : () -> () - tf_executor.yield - } - %3:3 = tf_executor.NextIteration.Source : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} - %4:3 = tf_executor.Merge %3#0, %1#0 : tensor<*xi32> {N = 2 : i64, T = "tfdtype$DT_INT32", device = "", name = "while/Merge"} - %5:2 = tf_executor.island(%4#2) { - %cst = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "while/Less/y", value = dense<2> : tensor} : () -> tensor - tf_executor.yield %cst : tensor - } - %6:2 = tf_executor.island { - %14 = "tf.Less"(%4#0, %5#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Less"} : (tensor<*xi32>, tensor) -> tensor<*xi1> - tf_executor.yield %14 : tensor<*xi1> - } - %7:2 = tf_executor.LoopCond %6#0 : (tensor<*xi1>) -> (tensor, !tf_executor.control) {device = "", name = "while/LoopCond"} - %8:3 = tf_executor.Switch %4#0, %7#0 : tensor<*xi32> {T = "tfdtype$DT_INT32", _class = ["loc = @while/Merge"], device = "", name = "while/Switch"} - %9:2 = tf_executor.Exit %8#0 : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", name = "while/Exit"} - %10:2 = tf_executor.island { - %14 = "tf.Identity"(%8#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Identity"} : (tensor<*xi32>) -> tensor<*xi32> - tf_executor.yield %14 : tensor<*xi32> - } - %11:2 = tf_executor.island(%10#1) { - %cst = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "while/Add/y", value = dense<3> : tensor} : () -> tensor - tf_executor.yield %cst : tensor - } - %12:2 = tf_executor.island { - %14 = "tf.Add"(%10#0, %11#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Add"} : (tensor<*xi32>, tensor) -> tensor<*xi32> - tf_executor.yield %14 : tensor<*xi32> - } - %13 = tf_executor.ControlTrigger %2, %12#1, %9#1 {_tpu_replicate = "cluster", device = "", name = "gradients/while/mul_2_Da30D05wlPU_grad/SymbolicGradient/b_sync"} - tf_executor.NextIteration.Sink [%3#1] %12#0, %13 : tensor<*xi32> {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} - tf_executor.fetch - } - return -} -// CHECK-NEXT: %[[CONST:[0-9]*]]:2 = "_tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> (tensor, !_tf.control) -// CHECK-NEXT: %[[ENTER:[0-9]*]]:2 = "_tf.Enter"(%[[CONST]]#0) {T = "tfdtype$DT_INT32", device = "", frame_name = "while/while_context", is_constant = false, name = "while/Enter", parallel_iterations = 10 : i64} : (tensor) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[NOOP:[0-9]*]] = "_tf.NoOp"() {device = "", name = "cluster/pivot"} : () -> !_tf.control -// CHECK-NEXT: %[[SOURCE:[0-9]*]]:2 = "_tf.NextIteration.source"() {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} : () -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[MERGE:[0-9]*]]:3 = "_tf.Merge"(%[[SOURCE]]#0, %[[ENTER]]#0) {N = 2 : i64, T = "tfdtype$DT_INT32", device = "", name = "while/Merge"} : (tensor<*xi32>, tensor<*xi32>) -> (tensor<*xi32>, tensor, !_tf.control) -// CHECK-NEXT: %[[CONST_LESS:[0-9]*]]:2 = "_tf.Const"(%[[MERGE]]#2) {device = "", dtype = "tfdtype$DT_INT32", name = "while/Less/y", value = dense<2> : tensor} : (!_tf.control) -> (tensor, !_tf.control) -// CHECK-NEXT: %[[LESS:[0-9]*]]:2 = "_tf.Less"(%[[MERGE]]#0, %[[CONST_LESS]]#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Less"} : (tensor<*xi32>, tensor) -> (tensor<*xi1>, !_tf.control) -// CHECK-NEXT: %[[COND:[0-9]*]]:2 = "_tf.LoopCond"(%[[LESS]]#0) {device = "", name = "while/LoopCond"} : (tensor<*xi1>) -> (tensor, !_tf.control) -// CHECK-NEXT: %[[SWITCH:[0-9]*]]:3 = "_tf.Switch"(%[[MERGE]]#0, %[[COND]]#0) {T = "tfdtype$DT_INT32", _class = ["loc = @while/Merge"], device = "", name = "while/Switch"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[EXIT:[0-9]*]]:2 = "_tf.Exit"(%[[SWITCH]]#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Exit"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[IDENTITY:[0-9]*]]:2 = "_tf.Identity"(%[[SWITCH]]#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Identity"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[CONST_ADD:[0-9]*]]:2 = "_tf.Const"(%[[IDENTITY]]#1) {device = "", dtype = "tfdtype$DT_INT32", name = "while/Add/y", value = dense<3> : tensor} : (!_tf.control) -> (tensor, !_tf.control) -// CHECK-NEXT: %[[ADD:[0-9]*]]:2 = "_tf.Add"(%[[IDENTITY]]#0, %[[CONST_ADD]]#0) {T = "tfdtype$DT_INT32", device = "", name = "while/Add"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[CT:[0-9]*]] = "_tf.ControlTrigger"(%[[NOOP]], %[[ADD]]#1, %[[EXIT]]#1) {_tpu_replicate = "cluster", device = "", name = "gradients/while/mul_2_Da30D05wlPU_grad/SymbolicGradient/b_sync"} : (!_tf.control, !_tf.control, !_tf.control) -> !_tf.control -// CHECK-NEXT: %[[SINK:[0-9]*]] = "_tf.NextIteration.sink"(%[[ADD]]#0, %[[CT]]) {T = "tfdtype$DT_INT32", device = "", id = 0 : i64, name = "while/NextIteration"} : (tensor<*xi32>, !_tf.control) -> !_tf.control -// CHECK-NEXT: return - -// ----- - -// CHECK-LABEL: func @multiple_ops_region -func @multiple_ops_region(%arg0 : tensor<*xi32>, %arg1 : tensor) { - tf_executor.graph { - %0:2 = tf_executor.island { - // The 4 operations are independent, but the current conversion will add - // control dependencies conservatively. - %1 = "tf.Add"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add1"} : (tensor<*xi32>, tensor) -> tensor<*xi32> - %2 = "tf.Add"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add2"} : (tensor<*xi32>, tensor) -> tensor<*xi32> - %3 = "tf.Add"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add3"} : (tensor<*xi32>, tensor) -> tensor<*xi32> - %4 = "tf.Add"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add4"} : (tensor<*xi32>, tensor) -> tensor<*xi32> - tf_executor.yield %4 : tensor<*xi32> - } - tf_executor.fetch - } - return -} -// CHECK-NEXT: %[[ADD1:[0-9]*]]:2 = "_tf.Add"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add1"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[ADD2:[0-9]*]]:2 = "_tf.Add"(%arg0, %arg1, %[[ADD1]]#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add2"} : (tensor<*xi32>, tensor, !_tf.control) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[ADD3:[0-9]*]]:2 = "_tf.Add"(%arg0, %arg1, %[[ADD2]]#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add3"} : (tensor<*xi32>, tensor, !_tf.control) -> (tensor<*xi32>, !_tf.control) -// CHECK-NEXT: %[[ADD4:[0-9]*]]:2 = "_tf.Add"(%arg0, %arg1, %[[ADD3]]#1) {T = "tfdtype$DT_INT32", device = "", name = "while/Add4"} : (tensor<*xi32>, tensor, !_tf.control) -> (tensor<*xi32>, !_tf.control) - -// ----- - -// CHECK-LABEL: func @switchN( -func @switchN(%arg0: tensor, %arg1: tensor<*xf32>) -> tensor<*xf32> { - %fetches = tf_executor.graph { - // CHECK: [[S1:%.*]]:6 = "_tf._SwitchN"(%arg1, %arg0) {num_outs = 5 : i64} - %1:6 = tf_executor.SwitchN %arg1, %arg0 of 5 : tensor<*xf32> - // CHECK: "_tf._SwitchN"(%arg1, %arg0, [[S1]]#5) {num_outs = 12 : i64} - %2:13 = tf_executor.SwitchN %arg1, %arg0 of 12 (%1#5) : tensor<*xf32> - tf_executor.fetch %2#0 : tensor<*xf32> - } - return %fetches : tensor<*xf32> -} - -// ----- - -// Test if tf_executor dialect ops with Ref types are mapped correctly to the ops in control dialect. -// CHECK-LABEL: func @ref_tf_executor_ops -func @ref_tf_executor_ops(%arg0: tensor<4x!tf.f32ref>, %arg1: tensor<4x!tf.f32ref>, %arg3: tensor, %arg4: tensor ) -> tensor<4x!tf.f32ref> { - %result = tf_executor.graph { - // CHECK: _tf.Enter - %0:2 = tf_executor.Enter %arg0 frame "while/while_context" : (tensor<4x!tf.f32ref>) -> (tensor<4x!tf.f32ref>, !tf_executor.control) - // CHECK: _tf.Exit - %1:2 = tf_executor.Exit %arg0 : tensor<4x!tf.f32ref> - // CHECK: _tf.Switch - %2:3 = tf_executor.Switch %arg0, %arg4 : (tensor<4x!tf.f32ref>, tensor) -> (tensor<4x!tf.f32ref>, tensor<4x!tf.f32ref>, !tf_executor.control) - // CHECK: _tf.Merge - %3:3 = tf_executor.Merge %arg0, %arg1 : (tensor<4x!tf.f32ref>, tensor<4x!tf.f32ref>) -> (tensor<4x!tf.f32ref>, tensor, !tf_executor.control) - // CHECK: _tf.NextIteration.source - %4:3 = tf_executor.NextIteration.Source : tensor<4x!tf.f32ref> - // CHECK: _tf.NextIteration.sink - tf_executor.NextIteration.Sink [%4#1] %4#0 : tensor<4x!tf.f32ref> - tf_executor.fetch %0#0 : tensor<4x!tf.f32ref> - } - return %result : tensor<4x!tf.f32ref> -} - -// ----- - -// Tests if empty island with just one control dependency input and output is -// handled correctly. -// CHECK-LABEL: func @empty_island_control_dep_only -func @empty_island_control_dep_only() -> tensor { - %fetch = tf_executor.graph { - %0:2 = tf_executor.island { - %4 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor - tf_executor.yield %4 : tensor - } - // CHECK-NEXT: %[[CONST1:[0-9]*]]:2 = "_tf.Const"() - // CHECK-SAME: () -> (tensor, !_tf.control) - %1:2 = tf_executor.island { - %5 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor - tf_executor.yield %5 : tensor - } - // CHECK-NEXT: %[[CONST2:[0-9]*]]:2 = "_tf.Const"() - // CHECK-SAME: () -> (tensor, !_tf.control) - %2 = tf_executor.island(%0#1) { - tf_executor.yield - } - %3:2 = tf_executor.island(%2, %1#1) { - %6 = "tf.Add"(%0#0, %1#0) : (tensor, tensor) -> tensor - tf_executor.yield %6 : tensor - } - // CHECK-NEXT: %[[ADD:[0-9]*]]:2 = "_tf.Add"(%[[CONST1]]#0, %[[CONST2]]#0, %[[CONST1]]#1, %[[CONST2]]#1) - // CHECK-SAME: (tensor, tensor, !_tf.control, !_tf.control) -> (tensor, !_tf.control) - tf_executor.fetch %3#0 : tensor - } - return %fetch : tensor -} - -// ----- - -// Tests if empty island with multiple control inputs will be replaced with a -// no-op. -// CHECK-LABEL: func @empty_island_multi_control_inputs -func @empty_island_multi_control_inputs() -> tensor { - %fetch = tf_executor.graph { - %0:2 = tf_executor.island { - %4 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor - tf_executor.yield %4 : tensor - } - // CHECK-NEXT: %[[CONST1:[0-9]*]]:2 = "_tf.Const"() - // CHECK-SAME: () -> (tensor, !_tf.control) - %1:2 = tf_executor.island { - %5 = "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Const", value = dense<1> : tensor} : () -> tensor - tf_executor.yield %5 : tensor - } - // CHECK-NEXT: %[[CONST2:[0-9]*]]:2 = "_tf.Const"() - // CHECK-SAME: () -> (tensor, !_tf.control) - %2 = tf_executor.island(%0#1, %1#1) { - tf_executor.yield - } - // CHECK-NEXT: %[[NOOP:[0-9]*]] = "_tf.NoOp"(%[[CONST1]]#1, %[[CONST2]]#1) - // CHECK-SAME: (!_tf.control, !_tf.control) -> !_tf.control - %3:2 = tf_executor.island(%2) { - %6 = "tf.Add"(%0#0, %1#0) : (tensor, tensor) -> tensor - tf_executor.yield %6 : tensor - } - // CHECK-NEXT: %[[ADD:[0-9]*]]:2 = "_tf.Add"(%[[CONST1]]#0, %[[CONST2]]#0, %[[NOOP]]) - // CHECK-SAME: (tensor, tensor, !_tf.control) -> (tensor, !_tf.control) - tf_executor.fetch %3#0 : tensor - } - return %fetch : tensor -} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/raise-control-flow.mlir b/tensorflow/compiler/mlir/tensorflow/tests/raise-control-flow.mlir deleted file mode 100644 index a6c7bdd72ed..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/tests/raise-control-flow.mlir +++ /dev/null @@ -1,57 +0,0 @@ -// RUN: tf-opt %s -tf-raise-control-flow -split-input-file | FileCheck %s - -// Test that we remove underscores. - -// CHECK-LABEL: func @testSimpleAddsAndIdentity(%arg0: tensor<*xf32>) -func @testSimpleAddsAndIdentity(tensor<*xf32>) -> tensor<*xf32> { -^bb0(%0: tensor<*xf32>): - - // CHECK: %0 = "tf.Identity"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> - %1 = "_tf.Identity"(%0) : (tensor<*xf32>) -> tensor<*xf32> - - // CHECK: %1 = "tf.Add"(%arg0, %arg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - %2 = "_tf.Add"(%0, %0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - - // CHECK: %2 = "tf.Add"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - %3 = "_tf.Add"(%1, %2) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - - // CHECK: return %2 : tensor<*xf32> - return %3 : tensor<*xf32> -} - -// CHECK-LABEL: func @testAddWithControlDependency(%arg0: tensor<*xf32>) -func @testAddWithControlDependency(tensor<*xf32>) -> tensor<*xf32> { -^bb0(%0: tensor<*xf32>): - - // CHECK: %0 = "tf.Identity"(%arg0) : (tensor<*xf32>) -> tensor<*xf32> - %1:2 = "_tf.Identity"(%0) : (tensor<*xf32>) -> (tensor<*xf32>, !_tf.control) - - // CHECK: %1 = "tf.Add"(%arg0, %arg0) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - %2:2 = "_tf.Add"(%0, %0, %1#1) : (tensor<*xf32>, tensor<*xf32>, !_tf.control) -> (tensor<*xf32>, !_tf.control) - - // CHECK: %2 = "tf.Add"(%0, %1) : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32> - %3:2 = "_tf.Add"(%1#0, %2, %1#1, %2#1) : (tensor<*xf32>, tensor<*xf32>, !_tf.control, !_tf.control) -> (tensor<*xf32>, !_tf.control) - - // CHECK: return %2 : tensor<*xf32> - return %3 : tensor<*xf32> -} - -// TODO(clattner): simplify and expand these tests. This is mostly a placeholder. -func @LoopTest() { - %0:2 = "_tf.Const"() {device = "", name = "Const", dtype = "tfdtype$DT_INT32", value = dense<1> : tensor} : () -> (tensor, !_tf.control) - %1:2 = "_tf.Enter"(%0#0) {device = "", name = "while/Enter", T = "tfdtype$DT_INT32", frame_name = "while/while_context", is_constant = false, parallel_iterations = 10} : (tensor) -> (tensor<*xi32>, !_tf.control) - - %11:2 = "_tf.NextIteration.source"() {device = "", name = "while/NextIteration", T = "tfdtype$DT_INT32", id = 0} : () -> (tensor<*xi32>, !_tf.control) - - %2:3 = "_tf.Merge"(%11#0, %1#0) {device = "", name = "while/Merge", N = 2, T = "tfdtype$DT_INT32"} : (tensor<*xi32>, tensor<*xi32>) -> (tensor<*xi32>, tensor, !_tf.control) - %3:2 = "_tf.Const"(%2#2) {device = "", name = "while/Less/y", dtype = "tfdtype$DT_INT32", value = dense<2> : tensor} : (!_tf.control) -> (tensor, !_tf.control) - %4:2 = "_tf.Less"(%2#0, %3#0) {device = "", name = "while/Less", T = "tfdtype$DT_INT32"} : (tensor<*xi32>, tensor) -> (tensor<*xi1>, !_tf.control) - %5:2 = "_tf.LoopCond"(%4#0) {device = "", name = "while/LoopCond"} : (tensor<*xi1>) -> (tensor, !_tf.control) - %6:3 = "_tf.Switch"(%2#0, %5#0) {device = "", name = "while/Switch", T = "tfdtype$DT_INT32", _class = ["loc:@while/Merge"]} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, tensor<*xi32>, !_tf.control) - %7:2 = "_tf.Exit"(%6#0) {device = "", name = "while/Exit", T = "tfdtype$DT_INT32"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) - %8:2 = "_tf.Identity"(%6#1) {device = "", name = "while/Identity", T = "tfdtype$DT_INT32"} : (tensor<*xi32>) -> (tensor<*xi32>, !_tf.control) - %9:2 = "_tf.Const"(%8#1) {device = "", name = "while/Add/y", dtype = "tfdtype$DT_INT32", value = dense<3> : tensor} : (!_tf.control) -> (tensor, !_tf.control) - %10:2 = "_tf.Add"(%8#0, %9#0) {device = "", name = "while/Add", T = "tfdtype$DT_INT32"} : (tensor<*xi32>, tensor) -> (tensor<*xi32>, !_tf.control) - %ctl = "_tf.NextIteration.sink"(%10#0) {device = "", name = "while/NextIteration", T = "tfdtype$DT_INT32", id = 0} : (tensor<*xi32>) -> (!_tf.control) - return -} diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc index 849f1487c6e..24e77d31e7c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc +++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc @@ -58,6 +58,8 @@ void CreateTFStandardPipeline(OpPassManager &pm, func_pm.addPass(tf_executor::CreateTFExecutorGraphPruningPass()); func_pm.addPass(tf_executor::CreateTFExecutorIslandCoarseningPass()); func_pm.addPass(CreateMaterializePassthroughOpPass()); + if (options.form_clusters) + func_pm.addPass(TFDevice::CreateClusterFormationPass()); // Hopefully there is a single island left, or there wasn't any to begin with. // We now run the optimizer which operates mostly inside islands. diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h index 08c95bd8b0e..5ca3b3fc06c 100644 --- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h +++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h @@ -77,6 +77,9 @@ struct StandardPipelineOptions Option enable_inliner{*this, "enable-inliner", llvm::cl::desc("Enable inliner."), llvm::cl::init(false)}; + Option form_clusters{*this, "form-clusters", + llvm::cl::desc("Enable Cluster Formation pass."), + llvm::cl::init(false)}; }; // Propagates the pass manager with the passes involved in transforming or @@ -149,13 +152,6 @@ std::unique_ptr> CreateLegalizeHloToTfPass(); std::unique_ptr> CreateOpFusionPass(); } // namespace TF -namespace TFControlFlow { -// Raises from the "TensorFlow Control Flow" dialect to the standard TensorFlow -// dialect. -std::unique_ptr> CreateRaiseTFControlFlowPass(); - -} // namespace TFControlFlow - namespace tf_executor { class GraphOp; diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc b/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc deleted file mode 100644 index ca234818e10..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/transforms/raise_control_flow.cc +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file implements logic for raising from the "TensorFlow control flow" -// dialect of MLIR to the standard TensorFlow dialect. The TensorFlow control -// flow dialect represents control flow with Switch/Merge and a few related -// control flow nodes, along with control dependencies. -// -// This pass rebuilds them code in terms of MLIR branches and blocks, -// eliminating control dependencies, and results in the code being in the -// canonical TensorFlow dialect. - -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" - -namespace mlir { -namespace TFControlFlow { - -namespace { -struct RaiseTFControlFlow - : public PassWrapper { - void runOnFunction() { - // First start by recognizing loops and reconstructing a loop tree. - buildLoopNests(); - - // Next, transform Switch/Merge and other control flow ops into proper - // conditional control flow. - buildConditionals(); - - // Now that we have proper conditional control flow ops, the control edges - // can be dropped, and the underscores removed from operation names. - rewriteOps(); - } - - void buildLoopNests(); - void buildConditionals(); - void rewriteOps(); -}; - -//===----------------------------------------------------------------------===// -// Loop nest reconstruction -//===----------------------------------------------------------------------===// - -void RaiseTFControlFlow::buildLoopNests() { - // TODO(clattner) -} - -//===----------------------------------------------------------------------===// -// Conditional Reconstruction -//===----------------------------------------------------------------------===// - -void RaiseTFControlFlow::buildConditionals() { - // TODO. -} - -//===----------------------------------------------------------------------===// -// Final rewrite from TF Control Flow form to canonical TensorFlow form -//===----------------------------------------------------------------------===// - -static bool isUnderscoredTFOp(Operation &op) { - return op.getName().getStringRef().startswith("_tf."); -} - -// Drop control edges, and remove underscores from operation names. -void RaiseTFControlFlow::rewriteOps() { - auto function = getFunction(); - OpBuilder builder(function.getBody()); - - // On the first pass, create replacement operations for every one we are going - // to replace, updating anything that uses the normal results with the newly - // created operation. - for (auto &bb : function) { - for (auto &op : bb) { - // Ignore any operations that we aren't looking for. - if (!isUnderscoredTFOp(op)) continue; - - // We always insert the replacement operation next to the operation it - // is replacing. - builder.setInsertionPoint(&op); - - // Drop the leading _ off the name. - OperationState result(op.getLoc(), - op.getName().getStringRef().drop_front()); - - // Add an operand for each non-control input we find. Control values - // aren't necessary any more since the order within a block encodes the - // same information. - for (auto &operand : op.getOpOperands()) { - if (!operand.get().getType().isa()) - result.operands.push_back(operand.get()); - - // Drop all operands from the old operation, eliminating any - // inter-dependencies after this pass. - operand.drop(); - } - - // Add a result type for each non-control result we find. - bool sawControlResult = false; - for (auto opResult : op.getResults()) { - if (opResult.getType().isa()) { - sawControlResult = true; - } else { - // We assume all control inputs are at the end of the result list. - assert(!sawControlResult && "all control results must be last"); - (void)sawControlResult; - result.types.push_back(opResult.getType()); - } - } - - result.attributes.append(op.getAttrs().begin(), op.getAttrs().end()); - - // Create the replacement operation. - auto *replacement = builder.createOperation(result); - - // We know that all the control results are last, so we can just rewrite - // the first results. - for (unsigned i = 0, e = result.types.size(); i != e; ++i) - op.getResult(i).replaceAllUsesWith(replacement->getResult(i)); - } - } - - // In the second pass, we can safely remove all of the old operations, because - // we know that all inter-dependencies are dropped. - for (auto &bb : function) { - // Advance the iterator so we don't invalidate it when we remove an - // operation later in the loop. - for (auto &op : llvm::make_early_inc_range(bb)) - if (isUnderscoredTFOp(op)) op.erase(); - } -} - -} // namespace - -std::unique_ptr> CreateRaiseTFControlFlowPass() { - return std::make_unique(); -} - -static PassRegistration pass( - "tf-raise-control-flow", - "Raise from the TensorFlow Control Flow " - "dialect to the standard TensorFlow dialect"); - -} // namespace TFControlFlow -} // namespace mlir diff --git a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc b/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc deleted file mode 100644 index 481f1fac7b8..00000000000 --- a/tensorflow/compiler/mlir/tensorflow/translate/executor_to_control_dialect.cc +++ /dev/null @@ -1,242 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This transformation pass transforms from TF executor dialect to MLIR TF -// control dialect. - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Sequence.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project -#include "mlir/IR/Builders.h" // from @llvm-project -#include "mlir/IR/Operation.h" // from @llvm-project -#include "mlir/IR/Value.h" // from @llvm-project -#include "mlir/Pass/Pass.h" // from @llvm-project -#include "mlir/Pass/PassRegistry.h" // from @llvm-project -#include "mlir/Support/LLVM.h" // from @llvm-project -#include "tensorflow/compiler/mlir/tensorflow/ir/control_flow_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h" -#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h" -#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" - -#define DEBUG_TYPE "tf-executor-to-ctl" - -namespace mlir { - -namespace { -struct ExecutorToControlDialectConversion - : public PassWrapper { - void runOnFunction() override; -}; -} // end anonymous namespace - -static bool HasSingleGraph(FuncOp function) { - // We expect the function has only one region with one block, - if (function.getBlocks().size() != 1) return false; - auto &block = function.front(); - // and the block contains two ops, - if (std::next(block.begin()) == block.end()) return false; - // one GraphOp, - if (!isa(block.begin())) return false; - // followed by a terminator. - if (!std::next(block.begin())->isKnownTerminator()) return false; - return true; -} - -void ExecutorToControlDialectConversion::runOnFunction() { - if (!HasSingleGraph(getFunction())) { - LLVM_DEBUG(llvm::dbgs() - << "Expect a Function with a single block and a single graph op," - " skip tf_executor dialect conversion\n"); - return; - } - Type control_type = TFControlFlow::TFControlType::get(&getContext()); - - Block &body = getFunction().front(); - auto graph = cast(body.front()); - OpBuilder builder = OpBuilder::atBlockEnd(&body); - SmallString<64> new_op_name; - for (auto &op : llvm::make_early_inc_range(llvm::reverse(graph.GetBody()))) { - LLVM_DEBUG(llvm::dbgs() << "Process: " << op.getName() << "\n"); - - if (auto fetch = dyn_cast(op)) { - // Replace all the operands of the fetch op with the uses of the graph - // results, remove the fetch op afterwards. - for (auto ops_and_ret_vals : - llvm::zip(graph.getResults(), fetch.getOperands())) - std::get<0>(ops_and_ret_vals) - .replaceAllUsesWith(std::get<1>(ops_and_ret_vals)); - op.erase(); - continue; - } - - builder.setInsertionPoint(&op); - - if (auto island = dyn_cast(op)) { - Value ctl_sequence = nullptr; - if (island.GetBody().without_terminator().empty() && - island.getNumOperands() > 1) { - // For an empty island with multiple control inputs, we create a no-op - // inside it which will group all the inputs into one control output. - // This helps reducing the number of edges when there are multiple - // islands depending on this one. - builder.setInsertionPointToStart(&island.GetBody()); - builder.create(op.getLoc(), ArrayRef{}, - ArrayRef{}, ArrayRef{}); - builder.setInsertionPoint(&op); - } - for (Operation &wrapped_op : island.GetBody()) { - LLVM_DEBUG(llvm::dbgs() - << " In island: " << wrapped_op.getName() << "\n"); - if (isa(wrapped_op)) { - for (auto ops_and_ret_vals : - llvm::zip(island.getResults(), wrapped_op.getOperands())) - std::get<0>(ops_and_ret_vals) - .replaceAllUsesWith(std::get<1>(ops_and_ret_vals)); - break; - } - // Add a leading _ off the name. - new_op_name = "_"; - new_op_name += wrapped_op.getName().getStringRef(); - OperationState state(wrapped_op.getLoc(), new_op_name); - - // Add an operand for each non-control input we find. Collect control - // values separately to add them to the island operands - state.operands.append(wrapped_op.getOperands().begin(), - wrapped_op.getOperands().end()); - - // Chain operations through a control dependency, except for the first - // operations in the sequence that carry the control dependencies held - // by the island itself. - if (ctl_sequence) { - state.operands.push_back(ctl_sequence); - } else { - for (Value ctl_operand : island.getOperands()) - state.operands.push_back(ctl_operand); - } - - // Add a result type for each result - state.types.append(wrapped_op.getResultTypes().begin(), - wrapped_op.getResultTypes().end()); - state.types.push_back(control_type); - - // Create the replacement operation. - auto *replacement = builder.createOperation(state); - replacement->setAttrs(wrapped_op.getMutableAttrDict()); - - for (auto ops_and_ret_vals : - llvm::zip(wrapped_op.getResults(), replacement->getResults())) - std::get<0>(ops_and_ret_vals) - .replaceAllUsesWith(std::get<1>(ops_and_ret_vals)); - - ctl_sequence = replacement->getResult(replacement->getNumResults() - 1); - } - - if (ctl_sequence) { - // If ctl_sequence is non-null, this means at least one operation has - // been rewritten from ops in island. Last op rewritten must logically - // carry // all the island control inputs, we can simply use it to - // replace all uses of island's control output. - island.control().replaceAllUsesWith(ctl_sequence); - } else if (island.getNumOperands() > 0) { - // Getting here means island had an effectively empty body and there is - // just one control input. In this case, island's control output should - // be replaced with the control input. - assert(island.getNumOperands() == 1); - island.control().replaceAllUsesWith(island.getOperand(0)); - } - - op.erase(); - continue; - } - - new_op_name.clear(); - if (isa(op)) { - new_op_name = "_tf.Switch"; - } else if (isa(op)) { - new_op_name = "_tf._SwitchN"; - } else if (isa(op)) { - new_op_name = "_tf.Merge"; - } else if (isa(op)) { - new_op_name = "_tf.NextIteration.source"; - } else if (isa(op)) { - new_op_name = "_tf.NextIteration.sink"; - } else if (isa(op)) { - new_op_name = "_tf.LoopCond"; - } else if (isa(op)) { - new_op_name = "_tf.Enter"; - } else if (isa(op)) { - new_op_name = "_tf.Exit"; - } else if (isa(op)) { - new_op_name = "_tf.ControlTrigger"; - } else { - op.emitOpError() << "unhandled op in tf_executor to _tf conversion"; - return signalPassFailure(); - } - OperationState state(op.getLoc(), new_op_name); - // Drop all TokenType operands since they don't exist in the control - // dialect. - auto non_null_operands = llvm::make_filter_range( - op.getOperands(), - [](Value v) { return !v.getType().isa(); }); - state.operands.append(non_null_operands.begin(), non_null_operands.end()); - for (Type result_type : op.getResultTypes()) { - // Filter out TokenType, they don't exist in the control dialect. - if (result_type.isa()) continue; - if (!result_type.isa()) - state.types.push_back(result_type); - else - state.types.push_back(control_type); - } - // The control dialect has a control result for the sink operation. - if (isa(op)) - state.types.push_back(control_type); - - // Create the replacement operation. - auto *replacement = builder.createOperation(state); - replacement->setAttrs(op.getMutableAttrDict()); - - if (auto next_iteration = - dyn_cast(op)) { - next_iteration.output().replaceAllUsesWith(replacement->getResult(0)); - next_iteration.token().dropAllUses(); - next_iteration.control().replaceAllUsesWith(replacement->getResult(1)); - } else { - for (auto ops_and_ret_vals : - llvm::zip(op.getResults(), replacement->getResults())) - std::get<0>(ops_and_ret_vals) - .replaceAllUsesWith(std::get<1>(ops_and_ret_vals)); - } - op.erase(); - } - - // Now we have rewritten all ops inside GraphOp to TF Control dialect. We need - // to move all operations outside of GraphOp and remove it. - body.getOperations().splice(body.begin(), graph.GetBody().getOperations()); - graph.erase(); -} - -std::unique_ptr> -CreateTFExecutorToControlDialectConversion() { - return std::make_unique(); -} - -} // namespace mlir - -static mlir::PassRegistration pass( - "tf-executor-to-control-conversion", - "Convert from TF executor dialect to TF control dialect"); diff --git a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc index a445937570e..d48d90997de 100644 --- a/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc +++ b/tensorflow/compiler/mlir/tfjs/tf_tfjs_passes.cc @@ -23,12 +23,6 @@ limitations under the License. #include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h" #include "tensorflow/compiler/mlir/tfjs/transforms/passes.h" -namespace mlir { -/// Create a pass to convert from the TFExecutor to the TF control dialect. -std::unique_ptr> -CreateTFExecutorToControlDialectConversion(); -} // namespace mlir - namespace tensorflow { void AddTFToTFJSConversionPasses(mlir::OpPassManager* pm) { From b68b869e75916e6de37c2ca23a93643faf333011 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 9 Jun 2020 13:53:59 -0700 Subject: [PATCH 138/178] Fix invalid keras tensor isinstance check PiperOrigin-RevId: 315553346 Change-Id: I120234e58cb0fb9dce007e7739639519719a9764 --- tensorflow/python/keras/engine/input_layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py index b7720754d0c..75cf4960d27 100644 --- a/tensorflow/python/keras/engine/input_layer.py +++ b/tensorflow/python/keras/engine/input_layer.py @@ -164,7 +164,7 @@ class InputLayer(base_layer.Layer): else: raise_eager_tensor_error = False if keras_tensor.keras_tensors_enabled(): - if not isinstance(input_tensor, keras_tensor.keras_tensors_enabled()): + if not isinstance(input_tensor, keras_tensor.KerasTensor): raise_eager_tensor_error = True else: if not tf_utils.is_symbolic_tensor(input_tensor): From 1835465ac5a9c823f7187cb0dd5786da9c360838 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 9 Jun 2020 14:02:57 -0700 Subject: [PATCH 139/178] Add error_reporter DCHECK back into SimpleMemoryAllocator. This check was removed due to an internal build problem. PiperOrigin-RevId: 315555154 Change-Id: I0f211aa284b2d327df52941bfbcd998a1daf9656 --- tensorflow/lite/micro/simple_memory_allocator.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/lite/micro/simple_memory_allocator.cc b/tensorflow/lite/micro/simple_memory_allocator.cc index 84ff37b8cbd..7ca662f04d8 100644 --- a/tensorflow/lite/micro/simple_memory_allocator.cc +++ b/tensorflow/lite/micro/simple_memory_allocator.cc @@ -42,6 +42,7 @@ SimpleMemoryAllocator::SimpleMemoryAllocator(ErrorReporter* error_reporter, /* static */ SimpleMemoryAllocator* SimpleMemoryAllocator::Create( ErrorReporter* error_reporter, uint8_t* buffer_head, size_t buffer_size) { + TFLITE_DCHECK(error_reporter != nullptr); TFLITE_DCHECK(buffer_head != nullptr); SimpleMemoryAllocator tmp = SimpleMemoryAllocator(error_reporter, buffer_head, buffer_size); From 0d7dbab9df897bd6991cc6ec9aa08a531a2c4416 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 9 Jun 2020 14:19:35 -0700 Subject: [PATCH 140/178] Move the keras related load test to be keras integration test. PiperOrigin-RevId: 315558519 Change-Id: Idb5c4cc42185549803069a63d480e839514de090 --- .../python/keras/integration_test/BUILD | 10 ++ .../keras/integration_test/load_test.py | 167 ++++++++++++++++++ tensorflow/python/saved_model/load_test.py | 124 ------------- 3 files changed, 177 insertions(+), 124 deletions(-) create mode 100644 tensorflow/python/keras/integration_test/load_test.py diff --git a/tensorflow/python/keras/integration_test/BUILD b/tensorflow/python/keras/integration_test/BUILD index faf8894f813..eac3c6b03ad 100644 --- a/tensorflow/python/keras/integration_test/BUILD +++ b/tensorflow/python/keras/integration_test/BUILD @@ -42,6 +42,16 @@ tf_py_test( ], ) +cuda_py_test( + name = "load_test", + srcs = ["load_test.py"], + python_version = "PY3", + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/python:extra_py_tests_deps", + ], +) + tf_py_test( name = "legacy_rnn_test", # Remove this target in when TF 1 is deprecated. srcs = ["legacy_rnn_test.py"], diff --git a/tensorflow/python/keras/integration_test/load_test.py b/tensorflow/python/keras/integration_test/load_test.py new file mode 100644 index 00000000000..665a4541997 --- /dev/null +++ b/tensorflow/python/keras/integration_test/load_test.py @@ -0,0 +1,167 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile + +from absl.testing import parameterized + +import tensorflow as tf + + +def cycle(obj, cycles, signatures=None): + to_save = obj + # TODO(vbardiovsky): It would be nice if exported protos reached a fixed + # point w.r.t. saving/restoring, ideally after 2nd saving. + for _ in range(cycles): + path = tempfile.mkdtemp(prefix=tf.compat.v1.test.get_temp_dir()) + # If available, we'll run the save and restore preferring the GPU. This + # just makes sure we aren't throwing errors and have enough + # device("CPU") blocks to satisfy the placer. + device = "/device:GPU:0" if tf.test.is_gpu_available() else "/device:CPU:0" + with tf.device(device): + tf.saved_model.save(to_save, path, signatures) + loaded = tf.saved_model.load(path) + to_save = loaded + return loaded + + +@parameterized.named_parameters( + dict(testcase_name="ReloadOnce", cycles=1), + dict(testcase_name="ReloadTwice", cycles=2), + dict(testcase_name="ReloadThrice", cycles=3)) +class LoadTest(tf.test.TestCase, parameterized.TestCase): + + def test_optimizer(self, cycles): + + class _HasOptimizer(tf.Module): + + def __init__(self): + super(_HasOptimizer, self).__init__() + self.layer = tf.keras.layers.Dense(1) + self.optimizer = tf.keras.optimizers.Adam(0.01) + + @tf.function + def __call__(self, x): + return self.layer(x) + + @tf.function + def train(self, x, y): + with tf.GradientTape() as tape: + predicted = self(x) + loss = tf.math.reduce_sum(tf.math.abs(y - predicted)) + train_vars = self.layer.trainable_variables + grads = tape.gradient(loss, train_vars) + self.optimizer.apply_gradients(zip(grads, train_vars)) + + root = _HasOptimizer() + train_input = dict(x=tf.constant([[1.]]), + y=tf.constant([[2.]])) + root.train(**train_input) + imported = cycle(root, cycles) + self.assertAllClose(root.optimizer.learning_rate.numpy(), + imported.optimizer.learning_rate.numpy()) + self.assertAllClose(root(tf.constant([[-0.5]])), + imported(tf.constant([[-0.5]]))) + root.train(**train_input) + imported.train(**train_input) + self.assertAllClose(root(tf.constant([[-0.5]])), + imported(tf.constant([[-0.5]]))) + + def test_model_with_custom_function_attached(self, cycles): + root = tf.train.Checkpoint( + model=tf.keras.Sequential([tf.keras.layers.Dense(2)])) + + @tf.function + def _use_sequential(x): + return root.model.call(x) + + root.model.traced_call = _use_sequential + + original = root.model.traced_call(tf.zeros([1, 1])).numpy() + root = cycle(root, cycles) + self.assertAllEqual( + original, + root.model.traced_call(tf.zeros([1, 1])).numpy()) + + +@parameterized.named_parameters( + dict(testcase_name="ReloadOnce", cycles=1), + dict(testcase_name="ReloadTwice", cycles=2), + dict(testcase_name="ReloadThrice", cycles=3)) +class KerasLoadTest(tf.test.TestCase, parameterized.TestCase): + + def test_dense_features_layer(self, cycles): + columns = [ + tf.feature_column.numeric_column("x"), + tf.feature_column.numeric_column("y") + ] + layer = tf.keras.layers.DenseFeatures(columns) + model = tf.keras.Sequential([layer]) + model_input = {"x": tf.constant([[1.]]), + "y": tf.constant([[2.]])} + self.assertAllClose([[1., 2.]], model.predict(model_input, steps=1)) + loaded = cycle(model, cycles) + output, = loaded._default_save_signature(model_input).values() + self.assertAllClose([[1., 2.]], output) + signature_output, = loaded.signatures["serving_default"]( + **model_input).values() + self.assertAllClose([[1., 2.]], signature_output) + + def test_dense_features_layer_fit(self, cycles): + columns = [tf.feature_column.numeric_column("x")] + model = tf.keras.Sequential( + [tf.keras.layers.DenseFeatures(columns), + tf.keras.layers.Dense(1)]) + model_input = {"x": tf.constant([[1.]])} + model.compile(optimizer="adam", loss="mse", run_eagerly=True) + model.fit(model_input, tf.constant([[3.]])) + loaded = cycle(model, cycles) + loaded._default_save_signature(model_input) + loaded.signatures["serving_default"](**model_input) + + def test_multi_output_layer(self, cycles): + + inp = tf.keras.Input(name="inp", shape=(None,), dtype=tf.float32) + + class _MultiOutput(tf.keras.layers.Layer): + + def call(self, x): + return x + 1., x + 2. + + out = _MultiOutput(name="out")(inp) # pylint: disable=not-callable + model = tf.keras.Model(inp, out) + loaded = cycle(model, cycles) + self.assertAllClose( + dict(out=2., out_1=3.), + loaded.signatures["serving_default"](tf.constant(1.))) + + def test_functional_model_with_conv(self, cycles): + x = tf.keras.Input(name="x", shape=(None, None, 3), dtype=tf.float32) + conved = tf.keras.layers.Conv2D( + filters=3, kernel_size=3, dilation_rate=2)(x) + model = tf.keras.Model([x], conved) + model_input = tf.ones((1, 10, 10, 3)) + initial_output = model.predict([model_input]) + model = cycle(model, cycles) + self.assertAllClose( + [initial_output], + list(model.signatures["serving_default"](model_input).values())) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py index 2144682e21b..7bd2e87c739 100644 --- a/tensorflow/python/saved_model/load_test.py +++ b/tensorflow/python/saved_model/load_test.py @@ -34,7 +34,6 @@ from tensorflow.python.eager import context from tensorflow.python.eager import def_function from tensorflow.python.eager import test from tensorflow.python.eager import wrap_function -from tensorflow.python.feature_column import feature_column_lib from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors @@ -44,14 +43,6 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util from tensorflow.python.framework import versions -from tensorflow.python.keras import keras_parameterized -from tensorflow.python.keras.engine import base_layer -from tensorflow.python.keras.engine import input_layer -from tensorflow.python.keras.engine import sequential -from tensorflow.python.keras.engine import training as training_lib -from tensorflow.python.keras.layers import convolutional -from tensorflow.python.keras.layers import core -from tensorflow.python.keras.optimizer_v2 import adam from tensorflow.python.lib.io import file_io from tensorflow.python.module import module from tensorflow.python.ops import array_ops @@ -515,42 +506,6 @@ class LoadTest(test.TestCase, parameterized.TestCase): self.assertEqual(5, result[1].numpy()) self.assertEqual(0.5, result[2]["x"].numpy()) - def test_optimizer(self, cycles): - - class _HasOptimizer(module.Module): - - def __init__(self): - super(_HasOptimizer, self).__init__() - self.layer = core.Dense(1) - self.optimizer = adam.Adam(0.01) - - @def_function.function - def __call__(self, x): - return self.layer(x) - - @def_function.function - def train(self, x, y): - with backprop.GradientTape() as tape: - predicted = self(x) - loss = math_ops.reduce_sum(math_ops.abs(y - predicted)) - train_vars = self.layer.trainable_variables - grads = tape.gradient(loss, train_vars) - self.optimizer.apply_gradients(zip(grads, train_vars)) - - root = _HasOptimizer() - train_input = dict(x=constant_op.constant([[1.]]), - y=constant_op.constant([[2.]])) - root.train(**train_input) - imported = cycle(root, cycles) - self.assertAllClose(root.optimizer.learning_rate.numpy(), - imported.optimizer.learning_rate.numpy()) - self.assertAllClose(root(constant_op.constant([[-0.5]])), - imported(constant_op.constant([[-0.5]]))) - root.train(**train_input) - imported.train(**train_input) - self.assertAllClose(root(constant_op.constant([[-0.5]])), - imported(constant_op.constant([[-0.5]]))) - def test_positional_arguments(self, cycles): def func(x, training=False, abc=7.1, defg=7.7): del abc @@ -1710,21 +1665,6 @@ class LoadTest(test.TestCase, parameterized.TestCase): self.assertEqual(({"output_0": 1., "output_1": 0.}), self.evaluate(root.signatures["serving_default"]())) - def test_model_with_custom_function_attached(self, cycles): - root = util.Checkpoint(model=sequential.Sequential([core.Dense(2)])) - - @def_function.function - def _use_sequential(x): - return root.model.call(x) - - root.model.traced_call = _use_sequential - - original = root.model.traced_call(array_ops.zeros([1, 1])).numpy() - root = cycle(root, cycles) - self.assertAllEqual( - original, - root.model.traced_call(array_ops.zeros([1, 1])).numpy()) - def test_version_info(self, cycles): root = util.Checkpoint() root = cycle(root, cycles) @@ -1849,70 +1789,6 @@ class LoadTest(test.TestCase, parameterized.TestCase): self.assertAllEqual(imported2.f(rt, 3), [[4, 5], [6]]) -@keras_parameterized.run_all_keras_modes(always_skip_v1=True) -@parameterized.named_parameters( - dict(testcase_name="ReloadOnce", cycles=1), - dict(testcase_name="ReloadTwice", cycles=2), - dict(testcase_name="ReloadThrice", cycles=3)) -class KerasLoadTest(test.TestCase, parameterized.TestCase): - - def test_dense_features_layer(self, cycles): - columns = [ - feature_column_lib.numeric_column("x"), - feature_column_lib.numeric_column("y") - ] - layer = feature_column_lib.DenseFeatures(columns) - model = sequential.Sequential([layer]) - model_input = {"x": constant_op.constant([[1.]]), - "y": constant_op.constant([[2.]])} - self.assertAllClose([[1., 2.]], model.predict(model_input, steps=1)) - loaded = cycle(model, cycles) - output, = loaded._default_save_signature(model_input).values() - self.assertAllClose([[1., 2.]], output) - signature_output, = loaded.signatures["serving_default"]( - **model_input).values() - self.assertAllClose([[1., 2.]], signature_output) - - def test_dense_features_layer_fit(self, cycles): - columns = [feature_column_lib.numeric_column("x")] - model = sequential.Sequential( - [feature_column_lib.DenseFeatures(columns), - core.Dense(1)]) - model_input = {"x": constant_op.constant([[1.]])} - model.compile(optimizer="adam", loss="mse", run_eagerly=True) - model.fit(model_input, constant_op.constant([[3.]])) - loaded = cycle(model, cycles) - loaded._default_save_signature(model_input) - loaded.signatures["serving_default"](**model_input) - - def test_multi_output_layer(self, cycles): - - inp = input_layer.Input(name="inp", shape=(None,), dtype=dtypes.float32) - - class _MultiOutput(base_layer.Layer): - - def call(self, x): - return x + 1., x + 2. - - out = _MultiOutput(name="out")(inp) - model = training_lib.Model(inp, out) - loaded = cycle(model, cycles) - self.assertAllClose( - dict(out=2., out_1=3.), - loaded.signatures["serving_default"](constant_op.constant(1.))) - - def test_functional_model_with_conv(self, cycles): - x = input_layer.Input(name="x", shape=(None, None, 3), dtype=dtypes.float32) - conved = convolutional.Conv2D(filters=3, kernel_size=3, dilation_rate=2)(x) - model = training_lib.Model([x], conved) - model_input = array_ops.ones((1, 10, 10, 3)) - initial_output = model.predict([model_input]) - model = cycle(model, cycles) - self.assertAllClose( - [initial_output], - list(model.signatures["serving_default"](model_input).values())) - - class SingleCycleTests(test.TestCase, parameterized.TestCase): def test_load_with_tags(self): From befab153340906aa3cecf4fa54ebddd79cd6a7fa Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 9 Jun 2020 14:31:50 -0700 Subject: [PATCH 141/178] TPU Library internal change. PiperOrigin-RevId: 315561144 Change-Id: I1100e9b1fe9fb21a3a63fd149a38e159cb02b064 --- tensorflow/core/tpu/kernels/BUILD | 20 +++++-- .../tpu/kernels/tpu_compilation_cache_entry.h | 12 ++-- .../kernels/tpu_compilation_cache_external.cc | 12 ++-- .../kernels/tpu_compilation_cache_external.h | 21 +++---- .../{tpu_program.cc => tpu_program_group.cc} | 47 +++++++++++---- .../{tpu_program.h => tpu_program_group.h} | 30 +++++----- .../tpu/kernels/tpu_program_group_interface.h | 59 +++++++++++++++++++ 7 files changed, 147 insertions(+), 54 deletions(-) rename tensorflow/core/tpu/kernels/{tpu_program.cc => tpu_program_group.cc} (83%) rename tensorflow/core/tpu/kernels/{tpu_program.h => tpu_program_group.h} (87%) create mode 100644 tensorflow/core/tpu/kernels/tpu_program_group_interface.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index 0e5a91c961c..eb464fa7461 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -124,7 +124,7 @@ cc_library( ], deps = [ ":tpu_executable_info_proto_cc", - ":tpu_program", + ":tpu_program_group", "//tensorflow/compiler/xla/service:hlo_proto_cc", "//tensorflow/core/lib/core:refcount", ], @@ -167,14 +167,24 @@ cc_library( ) cc_library( - name = "tpu_program", - srcs = ["tpu_program.cc"], - hdrs = ["tpu_program.h"], + name = "tpu_program_group_interface", + hdrs = ["tpu_program_group_interface.h"], + deps = [ + "//tensorflow/compiler/tf2xla:host_compute_metadata_proto_cc", + "//tensorflow/compiler/xla/service:hlo_proto_cc", + ], +) + +cc_library( + name = "tpu_program_group", + srcs = ["tpu_program_group.cc"], + hdrs = ["tpu_program_group.h"], deps = [ ":tpu_compile_c_api_hdrs", ":tpu_compile_op_support", ":tpu_compile_proto_cc", ":tpu_executable_info_proto_cc", + ":tpu_program_group_interface", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/xla:xla_proto_cc", "//tensorflow/compiler/xla/client:compile_only_client", @@ -205,7 +215,7 @@ cc_library( ":tpu_compile_c_api_hdrs", ":tpu_compile_op_support", ":tpu_mesh_state_interface", - ":tpu_program", + ":tpu_program_group", ":tpu_util", ":trace_util_hdrs", "//tensorflow/compiler/xla/service", diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h index d16b2d521f6..5fc60236bf9 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ -#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_H_ #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" -#include "tensorflow/core/tpu/kernels/tpu_program.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group.h" namespace tensorflow { namespace tpu { @@ -26,7 +26,7 @@ namespace tpu { class CompilationCacheEntry { public: explicit CompilationCacheEntry( - std::unique_ptr tpu_program) + std::unique_ptr tpu_program) : tpu_program_(std::move(tpu_program)) {} // Constructor for an empty entry. @@ -53,7 +53,7 @@ class CompilationCacheEntry { } private: - std::unique_ptr tpu_program_; + std::unique_ptr tpu_program_; }; // Base class for a reference to a cached proto. A unique_ptr to a @@ -81,4 +81,4 @@ class CompilationRefHolder : public ResourceBase { } // namespace tpu } // namespace tensorflow -#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_ENTRY_H_ +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_ENTRY_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index 8dbf60803cc..2f550b20774 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_metrics.h" #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" -#include "tensorflow/core/tpu/kernels/tpu_program.h" #include "tensorflow/core/tpu/kernels/tpu_util.h" #include "tensorflow/core/tpu/kernels/trace_util.h" @@ -42,7 +41,7 @@ int64 get_uid() { } void PopulateEntry(const std::string& key, CompilationEntry* entry, - std::unique_ptr tpu_program) { + std::unique_ptr tpu_program) { // Make the unique keys for each cached proto. for (int i = 0; i < tpu_program->program_count(); ++i) { entry->proto_key.push_back(ProtoKeyForComputation(key, i)); @@ -202,7 +201,7 @@ void TpuCompilationCacheInterface::InsertEntry( CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( const string& key, - const std::function& initialize_program, + const std::function& initialize_program, const TpuCompilationCacheKey& subgraph_key) { CompilationEntry* main_entry = new CompilationEntry(); @@ -221,7 +220,7 @@ CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( // can proceed during the (potentially lengthy) initialization. Status initialization_status; - auto tpu_program = absl::make_unique(); + auto tpu_program = absl::make_unique(); { mu_.Unlock(); { @@ -637,7 +636,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( std::vector* proto_key, std::vector* may_modify_variables, std::vector* removed_entries, std::vector>* hlo_metadata, - const std::function& compile_function) { + const std::function& compile_function) { profiler::TraceMe subgraph_lookup_traceme( "TPU compilation cache subgraph lookup", /*level=*/2); @@ -776,7 +775,8 @@ tensorflow::Status TpuCompilationCacheInterface::CompileIfKeyAbsent( TpuCompilationRefHolder* per_step_ref_holder, int64* uid, std::vector* proto_key, std::vector* may_modify_variables, std::vector>* hlo_metadata, - const std::function& compile_function) { + const std::function& + compile_function) { std::vector removed_entries; auto status = CompileIfKeyAbsentHelper( cache_key, session_metadata, per_step_ref_holder, uid, proto_key, diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h index b6cdbe9fa0b..c3460c7e7bf 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ -#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_EXTERNAL_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_EXTERNAL_H_ #include #include @@ -34,7 +34,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" #include "tensorflow/core/tpu/kernels/tpu_mesh_state_interface.h" -#include "tensorflow/core/tpu/kernels/tpu_program.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group.h" namespace tensorflow { namespace tpu { @@ -122,7 +122,7 @@ class TpuCompilationCacheInterface : public ResourceBase { string cache_entry_debug_string; // Compiled Tpu program. - std::unique_ptr tpu_program; + std::unique_ptr tpu_program; }; explicit TpuCompilationCacheInterface(int64_t max_cache_size); @@ -137,7 +137,8 @@ class TpuCompilationCacheInterface : public ResourceBase { TpuCompilationRefHolder* per_step_ref_holder, int64* uid, std::vector* proto_key, std::vector* may_modify_variables, std::vector>* hlo_metadata, - const std::function& compile_function); + const std::function& + compile_function); static TpuCompilationCacheKey CreateCompilationCacheKey( absl::string_view function_name, uint64 function_library_fingerprint, @@ -266,7 +267,7 @@ class TpuCompilationCacheInterface : public ResourceBase { std::vector* proto_key, std::vector* may_modify_variables, std::vector* removed_entries, std::vector>* hlo_metadata, - const std::function& compile_function); + const std::function& compile_function); // This is called by the cache when entry is marked for eviction; by // a RefHolder (via DiscardEntryRefs) when a step completes; and by @@ -330,9 +331,9 @@ class TpuCompilationCacheInterface : public ResourceBase { // // **InitializeEntry releases mu_ during the call to initialize_programs.** CompilationEntry* InitializeEntry( - const string& key, - const std::function& initialize_program, - const TpuCompilationCacheKey& subgraph_key) + const string& key, + const std::function& initialize_program, + const TpuCompilationCacheKey& subgraph_key) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Unloads the program associated with the entry from all local devices @@ -391,4 +392,4 @@ class TpuCompilationCacheInterface : public ResourceBase { } // namespace tpu } // namespace tensorflow -#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_COMPILATION_CACHE_INTERFACE_H_ +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILATION_CACHE_EXTERNAL_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_program.cc b/tensorflow/core/tpu/kernels/tpu_program_group.cc similarity index 83% rename from tensorflow/core/tpu/kernels/tpu_program.cc rename to tensorflow/core/tpu/kernels/tpu_program_group.cc index 7d89ad15ae9..43452b912ec 100644 --- a/tensorflow/core/tpu/kernels/tpu_program.cc +++ b/tensorflow/core/tpu/kernels/tpu_program_group.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/tpu/kernels/tpu_program.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group.h" #include "tensorflow/compiler/xla/service/hlo_module_group.h" #include "tensorflow/compiler/xla/xla.pb.h" @@ -99,7 +99,7 @@ StatusOr> CompileAheadOfTime( } // namespace -int64_t TpuProgram::program_size() const { +int64_t TpuProgramGroup::program_size() const { int64_t total_size = 0; for (XLA_TpuProgram* tpu_program : tpu_programs_) { total_size += TpuProgram_GetProgramSize(tpu_program); @@ -107,7 +107,7 @@ int64_t TpuProgram::program_size() const { return total_size; } -bool TpuProgram::LogProgramMemorySummary() { +bool TpuProgramGroup::LogProgramMemorySummary() { bool success = true; for (const XLA_TpuProgram* tpu_program : tpu_programs_) { success &= TpuProgram_LogProgramMemorySummary(tpu_program); @@ -115,25 +115,25 @@ bool TpuProgram::LogProgramMemorySummary() { return success; } -void TpuProgram::UnloadAndDestroyPrograms() { +void TpuProgramGroup::UnloadAndDestroyPrograms() { for (XLA_TpuProgram* tpu_program : tpu_programs_) { StatusHelper status; TpuProgram_UnloadAndDestroy(tpu_program, status.c_status); auto s = status.status(); if (!s.ok()) { - LOG(ERROR) << "TpuProgram::UnloadPrograms(): " << s.ToString(); + LOG(ERROR) << "TpuProgramGroup::UnloadPrograms(): " << s.ToString(); } } tpu_programs_.clear(); } -/*static*/ Status TpuProgram::Build( +/*static*/ Status TpuProgramGroup::Build( const TPUCompileMetadataProto& metadata, const tensorflow::XlaCompiler::CompilationResult& compilation_result, const std::vector& arg_core_mapping, const std::vector>& per_core_arg_shapes, const absl::optional& xla_device_assignment, - TpuProgram* tpu_program) { + TpuProgramGroup* tpu_program_group) { std::vector> per_core_output_shapes( metadata.num_cores_per_replica()); TF_RETURN_IF_ERROR(ComputeOutputShapesForEachCore( @@ -149,7 +149,7 @@ void TpuProgram::UnloadAndDestroyPrograms() { TF_RET_CHECK(per_core_output_shapes.size() == per_core_arg_shapes.size()); TF_RET_CHECK(per_core_output_shapes.size() == per_core_variable_indices.size()); - tpu_program->set_may_modify_variables(may_modify_variables); + tpu_program_group->set_may_modify_variables(may_modify_variables); // With shardable input/output pairs, XLA could generate separate // sharding/unsharding programs along with the main program. The @@ -164,7 +164,7 @@ void TpuProgram::UnloadAndDestroyPrograms() { // SPMD could return 1 result for all partitions. TF_RET_CHECK(xla_tpu_programs.size() == 1 || xla_tpu_programs.size() == metadata.num_cores_per_replica()); - tpu_program->set_tpu_programs(xla_tpu_programs); + tpu_program_group->set_tpu_programs(xla_tpu_programs); // TODO(jiawenhao): Handle the case of xla_tpu_programs.size() > 1. TpuSerializedProto serialized_executable_info; @@ -173,7 +173,7 @@ void TpuProgram::UnloadAndDestroyPrograms() { TPUExecutableInfoProto executable_info = se_tpu::DeserializeProto( serialized_executable_info); - tpu_program->set_executable_info(executable_info); + tpu_program_group->set_executable_info(executable_info); StreamExecutor_Tpu_FreeSerializedProto(&serialized_executable_info); TPUHostTransferInfoProto host_transfer_info; @@ -185,17 +185,40 @@ void TpuProgram::UnloadAndDestroyPrograms() { serialized_host_transfer_info); StreamExecutor_Tpu_FreeSerializedProto(&serialized_host_transfer_info); } - tpu_program->set_host_transfer_info(host_transfer_info); + tpu_program_group->set_host_transfer_info(host_transfer_info); TpuSerializedProto serialized_hlo_metadata; TpuProgram_GetHloMetadata(xla_tpu_programs[0], &serialized_hlo_metadata); xla::HloProto hlo_metadata = se_tpu::DeserializeProto(serialized_hlo_metadata); - tpu_program->set_hlo_metadata(hlo_metadata); + tpu_program_group->set_hlo_metadata(hlo_metadata); StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_metadata); return Status::OK(); } +xla::HloProto TpuProgramGroup::hlo_metadata(int core_index) const { + CHECK_GE(core_index, 0); + CHECK_LT(core_index, program_count()); + TpuSerializedProto serialized_hlo_proto; + auto cleanup = gtl::MakeCleanup([serialized_hlo_proto]() { + StreamExecutor_Tpu_FreeSerializedProto(&serialized_hlo_proto); + }); + TpuProgram_GetHloMetadata(tpu_programs_[core_index], &serialized_hlo_proto); + return stream_executor::tpu::DeserializeProto( + serialized_hlo_proto); +} + +std::vector> +TpuProgramGroup::hlo_metadatas() const { + const size_t metadata_count = program_count(); + std::vector> hlo_metadatas; + hlo_metadatas.resize(metadata_count); + for (size_t i = 0; i < metadata_count; ++i) { + hlo_metadatas[i] = std::make_shared(hlo_metadata(i)); + } + return hlo_metadatas; +} + } // namespace tpu } // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_program.h b/tensorflow/core/tpu/kernels/tpu_program_group.h similarity index 87% rename from tensorflow/core/tpu/kernels/tpu_program.h rename to tensorflow/core/tpu/kernels/tpu_program_group.h index aee55bd2f48..1769f64f074 100644 --- a/tensorflow/core/tpu/kernels/tpu_program.h +++ b/tensorflow/core/tpu/kernels/tpu_program_group.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ -#define EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_H_ #include @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/tpu/kernels/tpu_compile_c_api.h" #include "tensorflow/core/tpu/kernels/tpu_compile_op_support.h" #include "tensorflow/core/tpu/kernels/tpu_executable_info.pb.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" #include "tensorflow/stream_executor/tpu/tpu_platform_interface.h" namespace tensorflow { @@ -78,32 +79,27 @@ class TpuAotCompilationOptions : public xla::AotCompilationOptions { shardable_value_update_pairs_; }; -// An executable capable of being fed to a TPU device. -class TpuProgram { +class TpuProgramGroup : public TpuProgramGroupInterface { public: using Status = ::stream_executor::port::Status; - virtual ~TpuProgram() = default; - static Status Build( const TPUCompileMetadataProto& metadata, const tensorflow::XlaCompiler::CompilationResult& compilation_result, const std::vector& arg_core_mapping, const std::vector>& per_core_arg_shapes, const absl::optional& xla_device_assignment, - TpuProgram* tpu_program); + TpuProgramGroup* tpu_program); - size_t program_count() const { - return tpu_programs_.size(); - } + size_t program_count() const override { return tpu_programs_.size(); } - int64_t program_size() const; + int64_t program_size() const override; - bool LogProgramMemorySummary(); + bool LogProgramMemorySummary() override; - void UnloadAndDestroyPrograms(); + void UnloadAndDestroyPrograms() override; - const std::vector& may_modify_variables() const { + const std::vector& may_modify_variables() const override { return may_modify_variables_; } void set_may_modify_variables(const std::vector& may_modify_variables) { @@ -145,6 +141,10 @@ class TpuProgram { hlo_metadata_ = hlo_metadata; } + xla::HloProto hlo_metadata(int core_index) const; + std::vector> hlo_metadatas() + const override; + private: std::vector may_modify_variables_; tf2xla::HostComputeMetadata host_compute_metadata_; @@ -158,4 +158,4 @@ class TpuProgram { } // namespace tpu } // namespace tensorflow -#endif // EXPERIMENTAL_BRAIN_TPU_1VM_MINIEXECUTOR_TPU_PROGRAM_H_ +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_program_group_interface.h b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h new file mode 100644 index 00000000000..a4f74fb750d --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_program_group_interface.h @@ -0,0 +1,59 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_INTERFACE_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_INTERFACE_H_ + +#include + +#include +#include + +#include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" + +namespace tensorflow { +namespace tpu { + +// An interface to holds all the programs and metadatas generated by the +// compiler, including those for the sharding/unsharding programs. +class TpuProgramGroupInterface { + public: + virtual ~TpuProgramGroupInterface() = default; + + // Computes program count. + virtual size_t program_count() const = 0; + + // Computes total program size. + virtual int64_t program_size() const = 0; + + // Unloads and destroys safely Tpu programs. + virtual void UnloadAndDestroyPrograms() = 0; + + // Logs program memory summary. + virtual bool LogProgramMemorySummary() = 0; + + // Hlo metadatas. + virtual std::vector> hlo_metadatas() + const = 0; + + // Boolean array to indicate if the modification of variables are + // allowed. + virtual const std::vector& may_modify_variables() const = 0; +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_PROGRAM_GROUP_INTERFACE_H_ From 06784eff59af6b6773c7c1cd69d7ac6497fbe4ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 14:34:09 -0700 Subject: [PATCH 142/178] For Runtime Metadata, HostTracer is not required, it supposed to only capture GPU events, CPU side of data are collected by other means than TraceMe. PiperOrigin-RevId: 315561690 Change-Id: I7d927d487d5a989905e19f3db350960df111200c --- tensorflow/core/common_runtime/direct_session.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 96938bcbafd..8093795b498 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -666,7 +666,9 @@ Status DirectSession::RunInternal( std::unique_ptr profiler_session; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { - profiler_session = ProfilerSession::Create(); + ProfileOptions options = ProfilerSession::DefaultOptions(); + options.set_host_tracer_level(0); + profiler_session = ProfilerSession::Create(options); } // Register this step with session's cancellation manager, so that From b22e5f55cd247749e4ec0251b5cf60163d1e7c54 Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Tue, 9 Jun 2020 14:45:02 -0700 Subject: [PATCH 143/178] Refactor custom op resolver classes out of the MicroInterpreter test. This change is needed because more tests (e.g. MicroAllocator tests) will need to use these custom/test/mock op resolvers to verify allocations. PiperOrigin-RevId: 315563985 Change-Id: Iac1df21a2664abfaaf023e8ad6d6ebd3a59d338a --- tensorflow/lite/micro/BUILD | 33 +++- tensorflow/lite/micro/kernels/BUILD | 5 + .../lite/micro/micro_interpreter_test.cc | 164 +----------------- tensorflow/lite/micro/test_helpers.cc | 137 +++++++++++++++ tensorflow/lite/micro/test_helpers.h | 42 +++++ tensorflow/lite/micro/testing/BUILD | 1 + 6 files changed, 219 insertions(+), 163 deletions(-) diff --git a/tensorflow/lite/micro/BUILD b/tensorflow/lite/micro/BUILD index dbfa6c7aaf1..41f3fde803b 100644 --- a/tensorflow/lite/micro/BUILD +++ b/tensorflow/lite/micro/BUILD @@ -30,7 +30,6 @@ cc_library( "micro_interpreter.cc", "micro_optional_debug_tools.cc", "simple_memory_allocator.cc", - "test_helpers.cc", ], hdrs = [ "memory_helpers.h", @@ -38,13 +37,11 @@ cc_library( "micro_interpreter.h", "micro_optional_debug_tools.h", "simple_memory_allocator.h", - "test_helpers.h", ], build_for_embedded = True, copts = micro_copts(), deps = [ ":micro_compatibility", - ":micro_utils", ":op_resolvers", "//tensorflow/lite:type_to_tflitetype", "//tensorflow/lite/c:common", @@ -58,6 +55,29 @@ cc_library( ], ) +cc_library( + name = "test_helpers", + srcs = [ + "test_helpers.cc", + ], + hdrs = [ + "test_helpers.h", + ], + build_for_embedded = True, + copts = micro_copts(), + deps = [ + ":micro_utils", + ":op_resolvers", + "//tensorflow/lite/c:common", + "//tensorflow/lite/core/api", + "//tensorflow/lite/kernels:kernel_util", + "//tensorflow/lite/kernels/internal:compatibility", + "//tensorflow/lite/kernels/internal:tensor", + "//tensorflow/lite/schema:schema_fbs", + "@flatbuffers//:runtime_cc", + ], +) + cc_library( name = "op_resolvers", srcs = [ @@ -228,8 +248,8 @@ tflite_micro_cc_test( ":micro_framework", ":micro_utils", ":op_resolvers", + ":test_helpers", "//tensorflow/lite/core/api", - "//tensorflow/lite/kernels:kernel_util", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -241,6 +261,7 @@ tflite_micro_cc_test( ], deps = [ ":micro_framework", + ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -253,6 +274,7 @@ tflite_micro_cc_test( deps = [ ":micro_framework", ":recording_allocators", + ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -264,6 +286,7 @@ tflite_micro_cc_test( ], deps = [ ":micro_framework", + ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -277,6 +300,7 @@ tflite_micro_cc_test( ":micro_framework", ":op_resolvers", ":recording_allocators", + ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", "//tensorflow/lite/micro/testing:test_conv_model", ], @@ -289,6 +313,7 @@ tflite_micro_cc_test( ], deps = [ ":micro_framework", + ":test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) diff --git a/tensorflow/lite/micro/kernels/BUILD b/tensorflow/lite/micro/kernels/BUILD index 5a429caba4e..c7fa19b8cea 100644 --- a/tensorflow/lite/micro/kernels/BUILD +++ b/tensorflow/lite/micro/kernels/BUILD @@ -237,6 +237,7 @@ tflite_micro_cc_test( "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:micro_utils", "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro:test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -516,6 +517,7 @@ tflite_micro_cc_test( "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro:test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -529,6 +531,7 @@ tflite_micro_cc_test( "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro:test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -550,6 +553,7 @@ tflite_micro_cc_test( "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:micro_utils", "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro:test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) @@ -587,6 +591,7 @@ tflite_micro_cc_test( "//tensorflow/lite/c:common", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:op_resolvers", + "//tensorflow/lite/micro:test_helpers", "//tensorflow/lite/micro/testing:micro_test", ], ) diff --git a/tensorflow/lite/micro/micro_interpreter_test.cc b/tensorflow/lite/micro/micro_interpreter_test.cc index ce08e44435f..c6d034819c3 100644 --- a/tensorflow/lite/micro/micro_interpreter_test.cc +++ b/tensorflow/lite/micro/micro_interpreter_test.cc @@ -18,172 +18,18 @@ limitations under the License. #include #include "tensorflow/lite/core/api/flatbuffer_conversions.h" -#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_mutable_op_resolver.h" #include "tensorflow/lite/micro/micro_optional_debug_tools.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/micro/test_helpers.h" #include "tensorflow/lite/micro/testing/micro_test.h" -namespace tflite { -namespace { - -// A simple operator that returns the median of the input with the number of -// times the kernel was invoked. The implementation below is deliberately -// complicated, just to demonstrate how kernel memory planning works. -class SimpleStatefulOp { - static constexpr int kBufferNotAllocated = 0; - // Inputs: - static constexpr int kInputTensor = 0; - // Outputs: - static constexpr int kMedianTensor = 0; - static constexpr int kInvokeCount = 1; - struct OpData { - int invoke_count = 0; - int sorting_buffer = kBufferNotAllocated; - }; - - public: - static const TfLiteRegistration* getRegistration() { - static TfLiteRegistration r = {Init, /* free= */ nullptr, Prepare, Invoke}; - return &r; - } - - static void* Init(TfLiteContext* context, const char* buffer, size_t length) { - TF_LITE_MICRO_EXPECT_EQ(nullptr, context->RequestScratchBufferInArena); - TF_LITE_MICRO_EXPECT_EQ(nullptr, context->AllocateBufferForEval); - TF_LITE_MICRO_EXPECT_EQ(nullptr, context->GetScratchBuffer); - - void* raw; - TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, context->AllocatePersistentBuffer( - context, sizeof(OpData), &raw)); - OpData* data = reinterpret_cast(raw); - *data = {}; - return raw; - } - - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - OpData* data = reinterpret_cast(node->user_data); - - // Make sure that the input is in uint8 with at least 1 data entry. - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - if (input->type != kTfLiteUInt8) return kTfLiteError; - if (NumElements(input->dims) == 0) return kTfLiteError; - - // Allocate a temporary buffer with the same size of input for sorting. - TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( - context, sizeof(uint8_t) * NumElements(input->dims), - &data->sorting_buffer)); - return kTfLiteOk; - } - - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) { - OpData* data = reinterpret_cast(node->user_data); - data->invoke_count += 1; - - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - const uint8_t* input_data = GetTensorData(input); - int size = NumElements(input->dims); - - uint8_t* sorting_buffer = reinterpret_cast( - context->GetScratchBuffer(context, data->sorting_buffer)); - // Copy inputs data to the sorting buffer. We don't want to mutate the input - // tensor as it might be used by a another node. - for (int i = 0; i < size; i++) { - sorting_buffer[i] = input_data[i]; - } - - // In place insertion sort on `sorting_buffer`. - for (int i = 1; i < size; i++) { - for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) { - std::swap(sorting_buffer[j], sorting_buffer[j - 1]); - } - } - - TfLiteTensor* median = GetOutput(context, node, kMedianTensor); - uint8_t* median_data = GetTensorData(median); - TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount); - int32_t* invoke_count_data = GetTensorData(invoke_count); - - median_data[0] = sorting_buffer[size / 2]; - invoke_count_data[0] = data->invoke_count; - return kTfLiteOk; - } -}; - -bool freed = false; - -class MockCustom { - public: - static const TfLiteRegistration* getRegistration() { - static TfLiteRegistration r = {Init, Free, Prepare, Invoke}; - return &r; - } - - static void* Init(TfLiteContext* context, const char* buffer, size_t length) { - // We don't support delegate in TFL micro. This is a weak check to test if - // context struct being zero-initialized. - TF_LITE_MICRO_EXPECT_EQ(nullptr, - context->ReplaceNodeSubsetsWithDelegateKernels); - // Do nothing. - return nullptr; - } - - static void Free(TfLiteContext* context, void* buffer) { freed = true; } - - static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - return kTfLiteOk; - } - - static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, 0); - const int32_t* input_data = input->data.i32; - const TfLiteTensor* weight = GetInput(context, node, 1); - const uint8_t* weight_data = weight->data.uint8; - TfLiteTensor* output = GetOutput(context, node, 0); - int32_t* output_data = output->data.i32; - output_data[0] = - 0; // Catch output tensor sharing memory with an input tensor - output_data[0] = input_data[0] + weight_data[0]; - return kTfLiteOk; - } -}; - -class MockOpResolver : public MicroOpResolver { - public: - const TfLiteRegistration* FindOp(BuiltinOperator op) const override { - return nullptr; - } - const TfLiteRegistration* FindOp(const char* op) const override { - if (strcmp(op, "mock_custom") == 0) { - return MockCustom::getRegistration(); - } else if (strcmp(op, "simple_stateful_op") == 0) { - return SimpleStatefulOp::getRegistration(); - } else { - return nullptr; - } - } - - MicroOpResolver::BuiltinParseFunction GetOpDataParser( - tflite::BuiltinOperator) const override { - // TODO(b/149408647): Figure out an alternative so that we do not have any - // references to ParseOpData in the micro code and the signature for - // MicroOpResolver::BuiltinParseFunction can be changed to be different from - // ParseOpData. - return ParseOpData; - } -}; - -} // namespace -} // namespace tflite - TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInterpreter) { - tflite::freed = false; const tflite::Model* model = tflite::testing::GetSimpleMockModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); - tflite::MockOpResolver mock_resolver; + tflite::testing::MockOpResolver mock_resolver; constexpr size_t allocator_buffer_size = 928 /* optimal arena size at the time of writting. */ + 16 /* alignment */ + 100 /* some headroom */; @@ -232,13 +78,13 @@ TF_LITE_MICRO_TEST(TestInterpreter) { tflite::PrintInterpreterState(&interpreter); } - TF_LITE_MICRO_EXPECT_EQ(tflite::freed, true); + TF_LITE_MICRO_EXPECT_EQ(tflite::testing::MockCustom::freed_, true); } TF_LITE_MICRO_TEST(TestKernelMemoryPlanning) { const tflite::Model* model = tflite::testing::GetSimpleStatefulModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); - tflite::MockOpResolver mock_resolver; + tflite::testing::MockOpResolver mock_resolver; constexpr size_t allocator_buffer_size = 1024; uint8_t allocator_buffer[allocator_buffer_size]; tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer, @@ -278,7 +124,7 @@ TF_LITE_MICRO_TEST(TestVariableTensorReset) { const tflite::Model* model = tflite::testing::GetComplexMockModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); - tflite::MockOpResolver mock_resolver; + tflite::testing::MockOpResolver mock_resolver; constexpr size_t allocator_buffer_size = 2096 /* optimal arena size at the time of writting. */ + 16 /* alignment */ + 100 /* some headroom */; @@ -356,7 +202,7 @@ TF_LITE_MICRO_TEST(TestIncompleteInitialization) { const tflite::Model* model = tflite::testing::GetComplexMockModel(); TF_LITE_MICRO_EXPECT_NE(nullptr, model); - tflite::MockOpResolver mock_resolver; + tflite::testing::MockOpResolver mock_resolver; constexpr size_t allocator_buffer_size = 2048; uint8_t allocator_buffer[allocator_buffer_size]; tflite::MicroInterpreter interpreter(model, mock_resolver, allocator_buffer, diff --git a/tensorflow/lite/micro/test_helpers.cc b/tensorflow/lite/micro/test_helpers.cc index c2607cd32c6..7d1b4d895d9 100644 --- a/tensorflow/lite/micro/test_helpers.cc +++ b/tensorflow/lite/micro/test_helpers.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/core/api/error_reporter.h" #include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" +#include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/schema/schema_generated.h" @@ -477,6 +479,141 @@ const Model* BuildComplexMockModel() { } // namespace +const TfLiteRegistration* SimpleStatefulOp::getRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + return &r; +} + +void* SimpleStatefulOp::Init(TfLiteContext* context, const char* buffer, + size_t length) { + TFLITE_DCHECK(context->AllocateBufferForEval == nullptr); + TFLITE_DCHECK(context->GetScratchBuffer == nullptr); + TFLITE_DCHECK(context->RequestScratchBufferInArena == nullptr); + + void* raw; + TFLITE_DCHECK(context->AllocatePersistentBuffer(context, sizeof(OpData), + &raw) == kTfLiteOk); + OpData* data = reinterpret_cast(raw); + *data = {}; + return raw; +} + +TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context, + TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + // Make sure that the input is in uint8 with at least 1 data entry. + const TfLiteTensor* input = tflite::GetInput(context, node, kInputTensor); + if (input->type != kTfLiteUInt8) return kTfLiteError; + if (NumElements(input->dims) == 0) return kTfLiteError; + + // Allocate a temporary buffer with the same size of input for sorting. + TF_LITE_ENSURE_STATUS(context->RequestScratchBufferInArena( + context, sizeof(uint8_t) * NumElements(input->dims), + &data->sorting_buffer)); + return kTfLiteOk; +} + +TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context, + TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + data->invoke_count += 1; + + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + const uint8_t* input_data = GetTensorData(input); + int size = NumElements(input->dims); + + uint8_t* sorting_buffer = reinterpret_cast( + context->GetScratchBuffer(context, data->sorting_buffer)); + // Copy inputs data to the sorting buffer. We don't want to mutate the input + // tensor as it might be used by a another node. + for (int i = 0; i < size; i++) { + sorting_buffer[i] = input_data[i]; + } + + // In place insertion sort on `sorting_buffer`. + for (int i = 1; i < size; i++) { + for (int j = i; j > 0 && sorting_buffer[j] < sorting_buffer[j - 1]; j--) { + std::swap(sorting_buffer[j], sorting_buffer[j - 1]); + } + } + + TfLiteTensor* median = GetOutput(context, node, kMedianTensor); + uint8_t* median_data = GetTensorData(median); + TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount); + int32_t* invoke_count_data = GetTensorData(invoke_count); + + median_data[0] = sorting_buffer[size / 2]; + invoke_count_data[0] = data->invoke_count; + return kTfLiteOk; +} + +const TfLiteRegistration* MockCustom::getRegistration() { + static TfLiteRegistration r; + r.init = Init; + r.prepare = Prepare; + r.invoke = Invoke; + r.free = Free; + return &r; +} + +void* MockCustom::Init(TfLiteContext* context, const char* buffer, + size_t length) { + // We don't support delegate in TFL micro. This is a weak check to test if + // context struct being zero-initialized. + TFLITE_DCHECK(context->ReplaceNodeSubsetsWithDelegateKernels == nullptr); + freed_ = false; + // Do nothing. + return nullptr; +} + +void MockCustom::Free(TfLiteContext* context, void* buffer) { freed_ = true; } + +TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) { + return kTfLiteOk; +} + +TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = tflite::GetInput(context, node, 0); + const int32_t* input_data = input->data.i32; + const TfLiteTensor* weight = tflite::GetInput(context, node, 1); + const uint8_t* weight_data = weight->data.uint8; + TfLiteTensor* output = GetOutput(context, node, 0); + int32_t* output_data = output->data.i32; + output_data[0] = + 0; // Catch output tensor sharing memory with an input tensor + output_data[0] = input_data[0] + weight_data[0]; + return kTfLiteOk; +} + +bool MockCustom::freed_ = false; + +const TfLiteRegistration* MockOpResolver::FindOp(BuiltinOperator op) const { + return nullptr; +} + +const TfLiteRegistration* MockOpResolver::FindOp(const char* op) const { + if (strcmp(op, "mock_custom") == 0) { + return MockCustom::getRegistration(); + } else if (strcmp(op, "simple_stateful_op") == 0) { + return SimpleStatefulOp::getRegistration(); + } else { + return nullptr; + } +} + +MicroOpResolver::BuiltinParseFunction MockOpResolver::GetOpDataParser( + tflite::BuiltinOperator) const { + // TODO(b/149408647): Figure out an alternative so that we do not have any + // references to ParseOpData in the micro code and the signature for + // MicroOpResolver::BuiltinParseFunction can be changed to be different from + // ParseOpData. + return ParseOpData; +} + const Model* GetSimpleMockModel() { static Model* model = nullptr; if (!model) { diff --git a/tensorflow/lite/micro/test_helpers.h b/tensorflow/lite/micro/test_helpers.h index 2d1d2895db0..4353f69fdbd 100644 --- a/tensorflow/lite/micro/test_helpers.h +++ b/tensorflow/lite/micro/test_helpers.h @@ -23,12 +23,54 @@ limitations under the License. #include "flatbuffers/flatbuffers.h" // from @flatbuffers #include "tensorflow/lite/c/common.h" #include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/micro/micro_mutable_op_resolver.h" #include "tensorflow/lite/micro/micro_utils.h" #include "tensorflow/lite/schema/schema_generated.h" namespace tflite { namespace testing { +// A simple operator that returns the median of the input with the number of +// times the kernel was invoked. The implementation below is deliberately +// complicated, just to demonstrate how kernel memory planning works. +class SimpleStatefulOp { + static constexpr int kBufferNotAllocated = 0; + // Inputs: + static constexpr int kInputTensor = 0; + // Outputs: + static constexpr int kMedianTensor = 0; + static constexpr int kInvokeCount = 1; + struct OpData { + int invoke_count = 0; + int sorting_buffer = kBufferNotAllocated; + }; + + public: + static const TfLiteRegistration* getRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); +}; + +class MockCustom { + public: + static const TfLiteRegistration* getRegistration(); + static void* Init(TfLiteContext* context, const char* buffer, size_t length); + static void Free(TfLiteContext* context, void* buffer); + static TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node); + static TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node); + + static bool freed_; +}; + +class MockOpResolver : public MicroOpResolver { + public: + const TfLiteRegistration* FindOp(BuiltinOperator op) const override; + const TfLiteRegistration* FindOp(const char* op) const override; + MicroOpResolver::BuiltinParseFunction GetOpDataParser( + tflite::BuiltinOperator) const override; +}; + // Returns a simple example flatbuffer TensorFlow Lite model. Contains 1 input, // 1 layer of weights, 1 output Tensor, and 1 operator. const Model* GetSimpleMockModel(); diff --git a/tensorflow/lite/micro/testing/BUILD b/tensorflow/lite/micro/testing/BUILD index 3d97d2fec1e..0d617895d19 100644 --- a/tensorflow/lite/micro/testing/BUILD +++ b/tensorflow/lite/micro/testing/BUILD @@ -25,6 +25,7 @@ cc_library( "//tensorflow/lite/micro:micro_error_reporter", "//tensorflow/lite/micro:micro_framework", "//tensorflow/lite/micro:micro_utils", + "//tensorflow/lite/micro:test_helpers", ], ) From fc49261d22257f0bdf2e09826e5d0c1ca6444390 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Tue, 9 Jun 2020 14:45:09 -0700 Subject: [PATCH 144/178] Added functions for arguments merging. Function for inserting linkable code before Write() call. PiperOrigin-RevId: 315564001 Change-Id: Ica2912e5569cb35b42191577d4c41d78c5328f96 --- tensorflow/lite/delegates/gpu/cl/BUILD | 2 + tensorflow/lite/delegates/gpu/cl/arguments.cc | 179 ++++++++++++++++++ tensorflow/lite/delegates/gpu/cl/arguments.h | 8 +- .../lite/delegates/gpu/cl/arguments_test.cc | 65 ++++++- .../lite/delegates/gpu/cl/tensor_type.cc | 9 + .../lite/delegates/gpu/cl/tensor_type.h | 5 +- 6 files changed, 262 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/BUILD b/tensorflow/lite/delegates/gpu/cl/BUILD index 7ace4ebf6a9..6730da1fdc6 100644 --- a/tensorflow/lite/delegates/gpu/cl/BUILD +++ b/tensorflow/lite/delegates/gpu/cl/BUILD @@ -45,6 +45,7 @@ cc_library( deps = [ ":gpu_object", ":opencl_wrapper", + ":tensor_type", ":util", "//tensorflow/lite/delegates/gpu/common:access_type", "//tensorflow/lite/delegates/gpu/common:status", @@ -65,6 +66,7 @@ cc_test( deps = [ ":arguments", ":gpu_object", + ":tensor_type", "//tensorflow/lite/delegates/gpu/common:data_type", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.cc b/tensorflow/lite/delegates/gpu/cl/arguments.cc index ef4683a9bee..38039a9de44 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.cc +++ b/tensorflow/lite/delegates/gpu/cl/arguments.cc @@ -19,6 +19,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_replace.h" #include "absl/strings/str_split.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" #include "tensorflow/lite/delegates/gpu/common/status.h" namespace tflite { @@ -110,6 +111,20 @@ void ReplaceAllWords(const std::string& old_word, const std::string& new_word, } } +std::string RenameArg(const std::vector& object_names, + const std::string& postfix, const std::string& arg_name) { + for (const auto& object_name : object_names) { + if (absl::StartsWith(arg_name, object_name) && + arg_name.size() > object_name.size() && + arg_name[object_name.size()] == '_') { + return object_name + postfix + + arg_name.substr(object_name.size(), + arg_name.size() - object_name.size()); + } + } + return arg_name + postfix; +} + void AppendArgument(const std::string& arg, std::string* args) { if (!args->empty()) { absl::StrAppend(args, ",\n "); @@ -361,6 +376,160 @@ absl::Status Arguments::SetGPUResources( return absl::OkStatus(); } +void Arguments::RenameArgs(const std::string& postfix, + std::string* code) const { + size_t next_position = code->find(kArgsPrefix); + while (next_position != std::string::npos) { + size_t arg_pos = next_position + strlen(kArgsPrefix); + std::string arg_name = GetNextWord(*code, arg_pos); + code->replace(arg_pos, arg_name.size(), arg_name + postfix); + next_position = code->find(kArgsPrefix, arg_pos + arg_name.size()); + } +} + +absl::Status Arguments::Merge(Arguments&& args, const std::string& postfix) { + std::vector object_names; + object_names.reserve(args.object_refs_.size() + args.objects_.size()); + for (auto& v : args.object_refs_) { + object_names.push_back(v.first); + const std::string name = v.first + postfix; + if (object_refs_.find(name) != object_refs_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Object reference name collision. Name - ", name)); + } + object_refs_[name] = {v.second.access_type, std::move(v.second.descriptor)}; + } + for (auto& v : args.objects_) { + object_names.push_back(v.first); + const std::string name = v.first + postfix; + if (objects_.find(name) != objects_.end()) { + return absl::InvalidArgumentError( + absl::StrCat("Object name collision. Name - ", name)); + } + objects_[name] = {v.second.access_type, std::move(v.second.obj_ptr)}; + } + for (const auto& v : args.int_values_) { + AddInt(RenameArg(object_names, postfix, v.first), v.second.value); + } + for (const auto& v : args.float_values_) { + AddFloat(RenameArg(object_names, postfix, v.first), v.second.value); + } + for (const auto& v : args.half_values_) { + AddHalf(RenameArg(object_names, postfix, v.first), v.second.value); + } + for (const auto& v : args.buffers_) { + AddBuffer(RenameArg(object_names, postfix, v.first), v.second); + } + for (const auto& v : args.images2d_) { + AddImage2D(RenameArg(object_names, postfix, v.first), v.second); + } + for (const auto& v : args.image2d_arrays_) { + AddImage2DArray(RenameArg(object_names, postfix, v.first), v.second); + } + for (const auto& v : args.images3d_) { + AddImage3D(RenameArg(object_names, postfix, v.first), v.second); + } + for (const auto& v : args.image_buffers_) { + AddImageBuffer(RenameArg(object_names, postfix, v.first), v.second); + } + return absl::OkStatus(); +} + +absl::Status Arguments::InsertLinkableCode(const std::string& link_object_name, + const std::string& linkable_code, + std::string* code) { + const GPUObjectDescriptor* desc_ptr; + AccessType access_type; + if (auto it = object_refs_.find(link_object_name); it != object_refs_.end()) { + desc_ptr = it->second.descriptor.get(); + access_type = it->second.access_type; + } else if (auto it = objects_.find(link_object_name); it != objects_.end()) { + desc_ptr = it->second.obj_ptr->GetGPUDescriptor(); + access_type = it->second.access_type; + } else { + return absl::NotFoundError( + absl::StrCat("No object with name - ", link_object_name)); + } + if (access_type != AccessType::WRITE && + access_type != AccessType::READ_WRITE) { + return absl::FailedPreconditionError(absl::StrCat( + "Object with name - ", link_object_name, " should have Write access.")); + } + + const auto* tensor_desc = dynamic_cast(desc_ptr); + if (!tensor_desc) { + return absl::FailedPreconditionError( + absl::StrCat("Object with name - ", link_object_name, + " is not spatial tensor. Currently linking supported only " + "for spatial tensors.")); + } + + std::string token = kArgsPrefix + link_object_name + ".Write"; + size_t next_position = code->find(token); + while (next_position != std::string::npos) { + size_t arg_pos = next_position; + next_position += token.size(); + char next = (*code)[next_position]; + if (next != '(') { + return absl::NotFoundError( + absl::StrCat("Expected ( after ", token, " call")); + } + std::vector args; + size_t close_bracket_pos; + RETURN_IF_ERROR(ParseArgsInsideBrackets(*code, next_position, + &close_bracket_pos, &args)); + std::string value_name, x_coord, y_coord, s_coord; + if (tensor_desc->HasAxis(Axis::BATCH)) { + if (args.size() == 5) { + value_name = args[0]; + x_coord = "(" + args[1] + " * args." + link_object_name + + ".Batch() + " + args[3] + ")"; + y_coord = args[2]; + s_coord = args[3]; + } else if (args.size() == 4) { + if (tensor_desc->IsBatchedWidth()) { + value_name = args[0]; + x_coord = args[1]; + y_coord = args[2]; + s_coord = args[3]; + } else { + std::string batch_name = tensor_desc->GetBatchIDFromState(); + if (batch_name.empty()) { + return absl::FailedPreconditionError( + "Object has Batch axis, but can not find batch_id."); + } + value_name = args[0]; + x_coord = "(" + args[1] + " * args." + link_object_name + + ".Batch() + " + batch_name + ")"; + y_coord = args[2]; + s_coord = args[3]; + } + } else { + return absl::FailedPreconditionError( + "Unsupported Write(...) method for linking."); + } + } else { + if (args.size() == 4) { + value_name = args[0]; + x_coord = args[1]; + y_coord = args[2]; + s_coord = args[3]; + } else { + return absl::FailedPreconditionError( + "Unsupported Write(...) method for linking."); + } + } + std::string patch = linkable_code; + ReplaceAllWords("in_out_value", value_name, &patch); + ReplaceAllWords("X_COORD", x_coord, &patch); + ReplaceAllWords("Y_COORD", y_coord, &patch); + ReplaceAllWords("S_COORD", s_coord, &patch); + code->insert(arg_pos, patch); + next_position = code->find(token, arg_pos + patch.size() + token.size()); + } + return absl::OkStatus(); +} + absl::Status Arguments::TransformToCLCode(std::string* code) { RETURN_IF_ERROR(AddObjectArgs()); RETURN_IF_ERROR(ResolveSelectorsPass(code)); @@ -480,6 +649,16 @@ absl::Status Arguments::Bind(cl_kernel kernel, int offset) { } offset++; } + for (int i = 0; i < shared_half4s_data_.size() / 4; ++i) { + const int error_code = clSetKernelArg(kernel, offset, sizeof(int16_t) * 4, + &shared_half4s_data_[i * 4]); + if (error_code != CL_SUCCESS) { + return absl::UnknownError(absl::StrCat( + "Failed to set kernel arguments - ", CLErrorCodeToString(error_code), + "(at index - ", offset, ")")); + } + offset++; + } return absl::OkStatus(); } diff --git a/tensorflow/lite/delegates/gpu/cl/arguments.h b/tensorflow/lite/delegates/gpu/cl/arguments.h index 453ffcb56b2..3d59c45ce9a 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments.h +++ b/tensorflow/lite/delegates/gpu/cl/arguments.h @@ -63,8 +63,14 @@ class Arguments { std::string GetListOfArgs(); - absl::Status Bind(cl_kernel kernel, int offset); + absl::Status Bind(cl_kernel kernel, int offset = 0); + void RenameArgs(const std::string& postfix, std::string* code) const; + absl::Status Merge(Arguments&& args, const std::string& postfix); + + absl::Status InsertLinkableCode(const std::string& link_object_name, + const std::string& linkable_code, + std::string* code); absl::Status TransformToCLCode(std::string* code); // Move only diff --git a/tensorflow/lite/delegates/gpu/cl/arguments_test.cc b/tensorflow/lite/delegates/gpu/cl/arguments_test.cc index 1a4c9fc9c00..24160b0a81f 100644 --- a/tensorflow/lite/delegates/gpu/cl/arguments_test.cc +++ b/tensorflow/lite/delegates/gpu/cl/arguments_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h" +#include "tensorflow/lite/delegates/gpu/cl/tensor_type.h" namespace tflite { namespace gpu { @@ -27,6 +28,7 @@ namespace { struct TestDescriptor : public GPUObjectDescriptor { absl::Status PerformSelector(const std::string& selector, const std::vector& args, + const std::vector& template_args, std::string* result) const override { if (selector == "Length") { *result = "length"; @@ -45,7 +47,7 @@ struct TestDescriptor : public GPUObjectDescriptor { } } - GPUResources GetGPUResources() const override { + GPUResources GetGPUResources(AccessType access_type) const override { GPUResources resources; resources.ints.push_back("length"); GPUBufferDescriptor desc; @@ -60,7 +62,8 @@ struct TestDescriptor : public GPUObjectDescriptor { TEST(ArgumentsTest, TestSelectorResolve) { TestDescriptor descriptor; Arguments args; - args.AddObjectRef("object", absl::make_unique(descriptor)); + args.AddObjectRef("object", AccessType::WRITE, + absl::make_unique(descriptor)); std::string sample_code = R"( if (a < 3) { value = args.object.Read(id); @@ -82,7 +85,8 @@ TEST(ArgumentsTest, TestSelectorResolve) { TEST(ArgumentsTest, TestNoSelector) { TestDescriptor descriptor; Arguments args; - args.AddObjectRef("object", absl::make_unique(descriptor)); + args.AddObjectRef("object", AccessType::WRITE, + absl::make_unique(descriptor)); std::string sample_code = R"( if (a < 3) { value = args.object.Write(id); @@ -91,6 +95,61 @@ TEST(ArgumentsTest, TestNoSelector) { EXPECT_FALSE(args.TransformToCLCode(&sample_code).ok()); } +TEST(ArgumentsTest, TestInsertLinkable) { + TensorDescriptor desc{DataType::FLOAT32, TensorStorageType::BUFFER, + Layout::HWC}; + Arguments args; + args.AddObjectRef("spatial_tensor", AccessType::WRITE, + absl::make_unique(desc)); + std::string linkable_code = "in_out_value += X_COORD * Y_COORD - S_COORD;\n"; + std::string sample_code = R"( + if (a < 3) { + args.spatial_tensor.Write(value, xc, yc, zc); + } +)"; + EXPECT_TRUE( + args.InsertLinkableCode("spatial_tensor", linkable_code, &sample_code) + .ok()); + const std::string expected_result = R"( + if (a < 3) { + value += xc * yc - zc; +args.spatial_tensor.Write(value, xc, yc, zc); + } +)"; + EXPECT_EQ(sample_code, expected_result); +} + +TEST(ArgumentsTest, TestMergeArgs) { + TensorDescriptor desc{DataType::FLOAT32, TensorStorageType::BUFFER, + Layout::HWC}; + Arguments args; + args.AddObjectRef("spatial_tensor", AccessType::WRITE, + absl::make_unique(desc)); + + Arguments linkable_args; + linkable_args.AddFloat("alpha", 0.5f); + std::string linkable_code = "in_out_value += args.alpha;\n"; + std::string sample_code = R"( + if (a < 3) { + args.spatial_tensor.Write(value, xc, yc, zc); + } +)"; + + linkable_args.RenameArgs("_link0", &linkable_code); + EXPECT_EQ(linkable_code, "in_out_value += args.alpha_link0;\n"); + EXPECT_TRUE(args.Merge(std::move(linkable_args), "_link0").ok()); + EXPECT_TRUE( + args.InsertLinkableCode("spatial_tensor", linkable_code, &sample_code) + .ok()); + const std::string expected_result = R"( + if (a < 3) { + value += args.alpha_link0; +args.spatial_tensor.Write(value, xc, yc, zc); + } +)"; + EXPECT_EQ(sample_code, expected_result); +} + } // namespace cl } // namespace gpu } // namespace tflite diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc index c685cba8f45..b0d3ad63b1b 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.cc +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.cc @@ -477,6 +477,15 @@ bool TensorDescriptor::HasAxis(Axis axis) const { return false; } +std::string TensorDescriptor::GetBatchIDFromState() const { + auto it = state_vars_.find("batch_id"); + if (it == state_vars_.end()) { + return ""; + } else { + return it->second; + } +} + bool TensorDescriptor::ParseCoordsFromArgs(const std::vector& args, int offset, std::string* xc, std::string* yc, std::string* zc, diff --git a/tensorflow/lite/delegates/gpu/cl/tensor_type.h b/tensorflow/lite/delegates/gpu/cl/tensor_type.h index 30e9e4fa3fb..3331b08f1ee 100644 --- a/tensorflow/lite/delegates/gpu/cl/tensor_type.h +++ b/tensorflow/lite/delegates/gpu/cl/tensor_type.h @@ -72,6 +72,9 @@ struct TensorDescriptor : public GPUObjectDescriptor { bool HasAxis(Axis axis) const; + std::string GetBatchIDFromState() const; + bool IsBatchedWidth() const; + DataType data_type = DataType::UNKNOWN; TensorStorageType storage_type = TensorStorageType::UNKNOWN; // This field describes logical layout, actual(physical) GPU layout can be @@ -128,8 +131,6 @@ struct TensorDescriptor : public GPUObjectDescriptor { bool ParseCoordsFromArgs(const std::vector& args, int offset, std::string* xc, std::string* yc, std::string* zc, std::string* sc, std::string* bc) const; - - bool IsBatchedWidth() const; }; std::string ToString(TensorStorageType type); From 25c696f76b9abb90c13613bc78180d14a952f6a0 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Tue, 9 Jun 2020 14:54:35 -0700 Subject: [PATCH 145/178] In Batch op, move Tensors into output, as opposed to copy. PiperOrigin-RevId: 315566064 Change-Id: I8d878df8062522b0c21d3d6ae3006472701fd9ac --- tensorflow/core/kernels/batch_kernels.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index fd4bdacff93..b67e6deab76 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -526,7 +526,7 @@ class BatchResource : public ResourceBase { for (int j = 0; j < batch->num_tasks(); ++j) { BatchTask& task = *(batch->mutable_task(j)); - task.context->set_output(i, split_tensor.at(j)); + task.context->set_output(i, std::move(split_tensor.at(j))); } // (Ignore a possible final split_tensors entry containing the // padding.) } From 22780f3485d60adfec511d54923f95da8b1529fc Mon Sep 17 00:00:00 2001 From: Chuan He Date: Tue, 9 Jun 2020 15:11:43 -0700 Subject: [PATCH 146/178] Fix type incompatible for unranked function output from input if not entry function. PiperOrigin-RevId: 315569664 Change-Id: I7461c7481207c3e98bf39793ccbcc44e73929fdf --- .../compiler/mlir/lite/flatbuffer_import.cc | 12 ++++++++-- .../unranked_function_output.mlir | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc index 185f31dd093..94f93e972f5 100644 --- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc +++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc @@ -803,9 +803,17 @@ StatusOr ConvertSubgraph( } for (auto output : func_outputs) { - bool is_constant = !is_op_output[output]; + const bool is_func_input = input_index_set.contains(output); + bool is_constant = !is_op_output[output] && !is_func_input; + // There are 2 cases tensor is scalar when it doesn't have a shape in + // flatbuffer: + // 1. `is_constant` = true, means this tensor is created from a constant op. + // 2. `is_func_input` = true and `is_entry_point` = true, which means this + // tensor is function input and function input type is a scalar tensor. + const bool shapeless_is_scalar = + is_constant || (is_func_input && is_entry_point); auto type_or_err = GetTensorType(*subgraph.tensors.at(output), builder, - /*shapeless_are_scalars=*/is_constant, + shapeless_is_scalar, /*is_constant=*/is_constant); if (!type_or_err.ok()) { emitError(func_loc, "error reading return types") diff --git a/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir new file mode 100644 index 00000000000..a31e8d4b79f --- /dev/null +++ b/tensorflow/compiler/mlir/lite/tests/flatbuffer2mlir/unranked_function_output.mlir @@ -0,0 +1,22 @@ +// RUN: flatbuffer_translate -mlir-to-tflite-flatbuffer %s -o - | flatbuffer_translate --tflite-flatbuffer-to-mlir - -o - | FileCheck --dump-input-on-failure %s + +// This test is to test for unranked function output from input, the output type should be compatible with input type. + +// CHECK: func @main(%arg0: tensor<1xf32>) -> tensor<*xf32> +// CHECK: %0 = "tf.While"(%arg0) {body = @body, cond = @cond, is_stateless = false} : (tensor<1xf32>) -> tensor<*xf32> +// CHECK: return %0 : tensor<*xf32> +// CHECK: func @cond(%arg0: tensor<*xf32>) -> tensor<*xf32> +// CHECK: func @body(%arg0: tensor<*xf32>) -> tensor<*xf32> + +func @main(%arg0: tensor<1xf32>) -> tensor<*xf32> { + %0 = "tf.While"(%arg0) {cond = @cond, body = @body, is_stateless = false} : (tensor<1xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + +func @cond(%arg1: tensor<*xf32>) -> tensor<*xf32> { + return %arg1: tensor<*xf32> +} + +func @body(%arg1: tensor<*xf32>) -> tensor<*xf32> { + return %arg1: tensor<*xf32> +} From 5a7be26e1d2020eb3bfb61ed5a5a1abdbcf3eb16 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 9 Jun 2020 15:22:47 -0700 Subject: [PATCH 147/178] [XLA/CPU] [NFC] Further simplify used datastructures in assigning buffers to Executable by using MaybeOwningDeviceyMemory PiperOrigin-RevId: 315571784 Change-Id: Ib01c8a9abf835e1a63fb0a1b9637a923f047d0bf --- .../xla/service/cpu/cpu_executable.cc | 131 +++++++++--------- .../compiler/xla/service/cpu/cpu_executable.h | 17 ++- 2 files changed, 70 insertions(+), 78 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index bba1a3ad610..1abcf17dad3 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -75,70 +75,66 @@ CpuExecutable::CpuExecutable( << reinterpret_cast(compute_function_); } -StatusOr, - std::vector>> -CpuExecutable::CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, - int device_ordinal, - absl::Span arguments) { - std::vector unowning_buffers( - assignment_->Allocations().size()); - std::vector owning_buffers( +static StatusOr MemoryForAllocation( + const BufferAllocation& allocation, + absl::Span arguments, + se::DeviceMemoryAllocator* memory_allocator, int device_ordinal) { + VLOG(3) << allocation.ToString(); + if (allocation.is_entry_computation_parameter()) { + se::DeviceMemoryBase out = arguments[allocation.parameter_number()] + .Buffer(allocation.param_shape_index()) + .AsDeviceMemoryBase(); + CHECK_EQ(allocation.size(), out.size()) + << "Size mismatch on param " << allocation.parameter_number() + << " at shape index " << allocation.param_shape_index().ToString(); + VLOG(3) << "allocation is a parameter"; + return MaybeOwningDeviceMemory{out}; + } else if (allocation.is_constant()) { + VLOG(3) << "allocation is a constant"; + return MaybeOwningDeviceMemory{se::DeviceMemoryBase{}}; + } else if (allocation.is_thread_local()) { + VLOG(3) << "buffer is thread-local"; + return MaybeOwningDeviceMemory{se::DeviceMemoryBase{}}; + } + + int64 buffer_size = allocation.size(); + TF_ASSIGN_OR_RETURN(se::OwningDeviceMemory out, + memory_allocator->Allocate(device_ordinal, buffer_size)); + VLOG(3) << "buffer allocated " << buffer_size << " bytes [" << out->opaque() + << "]"; + + // Since the output buffer and all the temporary buffers were written into + // by the JITed code, msan has no way of knowing their memory was + // initialized. Mark them initialized so that msan doesn't flag loads from + // these buffers. + TF_ANNOTATE_MEMORY_IS_INITIALIZED(out->opaque(), buffer_size); + return MaybeOwningDeviceMemory{std::move(out)}; +} + +StatusOr> CpuExecutable::CreateBufferTable( + se::DeviceMemoryAllocator* memory_allocator, int device_ordinal, + absl::Span arguments) { + std::vector buffers( assignment_->Allocations().size()); VLOG(3) << "Allocating " << assignment_->Allocations().size() << " allocations for module " << module().name(); for (BufferAllocation::Index i = 0; i < assignment_->Allocations().size(); ++i) { - auto& allocation = assignment_->GetAllocation(i); - - VLOG(3) << allocation.ToString(); - - if (allocation.is_entry_computation_parameter()) { - unowning_buffers[i] = arguments[allocation.parameter_number()] - .Buffer(allocation.param_shape_index()) - .AsDeviceMemoryBase(); - CHECK_EQ(allocation.size(), unowning_buffers[i].size()) - << "Size mismatch on param " << allocation.parameter_number() - << " at shape index " << allocation.param_shape_index().ToString(); - VLOG(3) << "allocation #" << i << " is a parameter"; - continue; - } - - if (allocation.is_constant()) { - VLOG(3) << "allocation #" << i << " is a constant"; - continue; - } - - if (allocation.is_thread_local()) { - VLOG(3) << "buffer #" << i << " is thread-local"; - continue; - } - - int64 buffer_size = allocation.size(); - CHECK(owning_buffers[i].is_null()); - TF_ASSIGN_OR_RETURN(owning_buffers[i], memory_allocator->Allocate( - device_ordinal, buffer_size)); - unowning_buffers[i] = *owning_buffers[i]; - - VLOG(3) << "buffer #" << i << " allocated " << buffer_size << " bytes [" - << owning_buffers[i]->opaque() << "]"; - - // Since the output buffer and all the temporary buffers were written into - // by the JITed code, msan has no way of knowing their memory was - // initialized. Mark them initialized so that msan doesn't flag loads from - // these buffers. - TF_ANNOTATE_MEMORY_IS_INITIALIZED(owning_buffers[i]->opaque(), buffer_size); + const BufferAllocation& allocation = assignment_->GetAllocation(i); + TF_ASSIGN_OR_RETURN( + buffers[i], MemoryForAllocation(allocation, arguments, memory_allocator, + device_ordinal)); } TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, assignment_->GetUniqueTopLevelOutputSlice()); VLOG(3) << "result index: " << result_slice.index(); - return std::make_tuple(std::move(unowning_buffers), - std::move(owning_buffers)); + return std::move(buffers); } Status CpuExecutable::ExecuteComputeFunction( const ExecutableRunOptions* run_options, - absl::Span buffers, + absl::Span buffers, HloExecutionProfile* hlo_execution_profile) { // The calling convention for JITed functions is: // @@ -166,7 +162,8 @@ Status CpuExecutable::ExecuteComputeFunction( // Call the computation function following the calling convention. std::vector buffer_pointers; for (auto& buffer : buffers) { - buffer_pointers.push_back(const_cast(buffer.opaque())); + buffer_pointers.push_back( + const_cast(buffer.AsDeviceMemoryBase().opaque())); } TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, assignment_->GetUniqueTopLevelOutputSlice()); @@ -210,7 +207,7 @@ Status CpuExecutable::ExecuteComputeFunction( StatusOr CpuExecutable::CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span owning_buffers) { + absl::Span buffers) { se::Stream* stream = run_options->stream(); ExecutionOutput result(/*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), @@ -225,7 +222,7 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( for (auto& p : result.MutableResult()->buffers()) { const ShapeIndex& index = p.first; se::DeviceMemoryBase& device_memory = p.second; - const auto& sources = this->GetRootValueSet().element(index); + const HloValueSet& sources = this->GetRootValueSet().element(index); // The points to set is unambiguous so the set should be a // singleton. CHECK_EQ(1, sources.values().size()); @@ -239,7 +236,7 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( const BufferAllocation::Slice slice, this->assignment_->GetUniqueSlice(src, value_source->index())); const BufferAllocation::Index buffer_index = slice.index(); - se::OwningDeviceMemory& buffer = owning_buffers[buffer_index]; + MaybeOwningDeviceMemory& buffer = buffers[buffer_index]; if (!slice.allocation()->is_entry_computation_parameter()) { // If the buffer coming out of the result is from a parameter, the // owning buffer will be null, and that means the caller aliased some @@ -251,7 +248,10 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( // ownership, and hence a buffer coming from there cannot be part of // the new ScopedShapedBuffer we create for the result (which assumes // ownership). - device_memory = buffer.Release(); + absl::optional owned_buffer = buffer.Release(); + CHECK(owned_buffer); + device_memory = owned_buffer->Release(); + buffer = device_memory; } else { auto output_alias = input_output_alias.GetAliasedOutput( slice.allocation()->parameter_number(), @@ -296,21 +296,15 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( run_options->stream()->implementation()); se::Stream* stream = run_options->stream(); se::DeviceMemoryAllocator* memory_allocator = run_options->allocator(); - std::vector owning_buffers; - std::vector unowning_buffers; TF_ASSIGN_OR_RETURN( - std::tie(unowning_buffers, owning_buffers), + std::vector buffers, CreateBufferTable(memory_allocator, stream->parent()->device_ordinal(), arguments)); TF_ASSIGN_OR_RETURN( ExecutionOutput result, - CreateResultShapedBuffer(run_options, absl::MakeSpan(owning_buffers))); + CreateResultShapedBuffer(run_options, absl::MakeSpan(buffers))); - // At this point, `unowning_buffers` contains unowning pointers to all of our - // buffers, and `buffers` contains owning pointers to the non-live-out - // buffers. Enqueue a task which keeps alive the non-live-out buffers. - // // Logically we want this lambda to capture `buffers` by move, ultimately our // functor needs to be wrapped in an std::function, and that requires its // functor to be copyable. Thus we perpetrate the hack of capturing buffers @@ -323,21 +317,20 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( struct AsyncRunTask { CpuExecutable* executable; ServiceExecutableRunOptions run_options; - std::vector unowning_buffers; - std::shared_ptr> buffers; + std::shared_ptr> task_buffers; HloExecutionProfile* hlo_execution_profile; void operator()() { // Failing a CHECK here is not great, but I don't see an obvious way to // return a failed Status asynchronously. TF_CHECK_OK(executable->ExecuteComputeFunction( - &run_options.run_options(), unowning_buffers, hlo_execution_profile)); + &run_options.run_options(), *task_buffers, hlo_execution_profile)); } }; host_stream->EnqueueTask( - AsyncRunTask{this, *run_options, std::move(unowning_buffers), - std::make_shared>( - std::move(owning_buffers)), + AsyncRunTask{this, *run_options, + std::make_shared>( + std::move(buffers)), hlo_execution_profile}); // TODO(cheshire): Duplication with other executables. diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 7e42ee717f3..97fc6b3a701 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -101,17 +101,16 @@ class CpuExecutable : public Executable { // // - buffers_to_free: buffers whose ownership was donated by the caller that // are to be freed by the caller. - StatusOr, - std::vector>> - CreateBufferTable(se::DeviceMemoryAllocator* memory_allocator, - int device_ordinal, - absl::Span arguments); + StatusOr> CreateBufferTable( + se::DeviceMemoryAllocator* memory_allocator, int device_ordinal, + absl::Span arguments); // Calls the generated function performing the computation with the given // arguments using the supplied buffers. - Status ExecuteComputeFunction(const ExecutableRunOptions* run_options, - absl::Span buffers, - HloExecutionProfile* hlo_execution_profile); + Status ExecuteComputeFunction( + const ExecutableRunOptions* run_options, + absl::Span buffers, + HloExecutionProfile* hlo_execution_profile); // Creates an Execution output holding ScopedShapedBuffer for holding the // result of the computation, moving buffers out of allocated_buffers and into @@ -119,7 +118,7 @@ class CpuExecutable : public Executable { // assignment. StatusOr CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span owning_buffers); + absl::Span buffers); // Returns the instruction value set of the root instruction of the entry // computation. Uses dataflow analysis from buffer assignment. From 41a78aee2a0c7a74f98183c93d78ae91e8a57577 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 9 Jun 2020 15:42:49 -0700 Subject: [PATCH 148/178] Enable tests for tf_numpy's array fn PiperOrigin-RevId: 315575565 Change-Id: I3ce06a0dec2af19eb20cc89bc0503891c1a6964c --- tensorflow/python/ops/numpy_ops/np_array_ops.py | 10 +++++++++- tensorflow/python/ops/numpy_ops/np_arrays.py | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/numpy_ops/np_array_ops.py b/tensorflow/python/ops/numpy_ops/np_array_ops.py index 944aed1c5a1..6f29da59c7c 100644 --- a/tensorflow/python/ops/numpy_ops/np_array_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_array_ops.py @@ -295,7 +295,15 @@ def array(val, dtype=None, copy=True, ndmin=0): # pylint: disable=redefined-out # Handles lists of ndarrays result_t = nest.map_structure(maybe_data, result_t) - result_t = np_arrays.convert_to_tensor(result_t) + # EagerTensor conversion complains about "mixed types" when converting + # tensors with no dtype information. This is because it infers types based + # on one selected item in the list. So e.g. when converting [2., 2j] + # to a tensor, it will select float32 as the inferred type and not be able + # to convert the list to a float 32 tensor. + # Since we have some information about the final dtype we care about, we + # supply that information so that convert_to_tensor will do best-effort + # conversion to that dtype first. + result_t = np_arrays.convert_to_tensor(result_t, dtype_hint=dtype) result_t = math_ops.cast(result_t, dtype=dtype) elif dtype: result_t = math_ops.cast(result_t, dtype) diff --git a/tensorflow/python/ops/numpy_ops/np_arrays.py b/tensorflow/python/ops/numpy_ops/np_arrays.py index bd58e486f50..37d6cda3b9a 100644 --- a/tensorflow/python/ops/numpy_ops/np_arrays.py +++ b/tensorflow/python/ops/numpy_ops/np_arrays.py @@ -27,7 +27,17 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops.numpy_ops import np_dtypes -def convert_to_tensor(value, dtype=None): +def convert_to_tensor(value, dtype=None, dtype_hint=None): + """Wrapper over `tf.convert_to_tensor`. + + Args: + value: value to convert + dtype: (optional) the type we would like it to be converted to. + dtype_hint: (optional) soft preference for the type we would like it to + be converted to. `tf.convert_to_tensor` will attempt to convert value + to this type first, but will not fail if conversion is not possible + falling back to inferring the type instead. + """ # A safer version of `tf.convert_to_tensor` to work around b/149876037. # TODO(wangpeng): Remove this function once the bug is fixed. if (dtype is None and isinstance(value, six.integer_types) and @@ -35,7 +45,7 @@ def convert_to_tensor(value, dtype=None): dtype = dtypes.uint64 elif (dtype is None and isinstance(value, float)): dtype = np_dtypes.default_float_type() - return ops.convert_to_tensor(value, dtype=dtype) + return ops.convert_to_tensor(value, dtype=dtype, dtype_hint=dtype_hint) class ndarray(object): # pylint: disable=invalid-name From 05070430586c9efc621a386e83f7a0de7c5fac3f Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 9 Jun 2020 15:44:01 -0700 Subject: [PATCH 149/178] Parallel device: move broadcasting a bit earlier to simplify type signatures I think we'll eventually need to move implicit broadcasting out of execute entirely for gradients to work, and moving it a little bit out helps simplify things for now. PiperOrigin-RevId: 315575773 Change-Id: Ib7e42e5f68d7261a431a4d0de01ca471090cd967 --- .../eager/parallel_device/parallel_device.cc | 28 ++++++++++++++++++- .../parallel_device/parallel_device_lib.cc | 26 ++++------------- .../parallel_device/parallel_device_lib.h | 12 +++----- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/tensorflow/c/eager/parallel_device/parallel_device.cc b/tensorflow/c/eager/parallel_device/parallel_device.cc index eec893e704d..5740fc4631e 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device.cc @@ -40,6 +40,9 @@ using OpPtr = std::unique_ptr; using MaybeParallelTensorOwned = absl::variant, TensorHandlePtr>; +using MaybeParallelTensorUnowned = + absl::variant; + // A ParallelDevice on its own is not registered with a TFE_Context, and so has // no device name (e.g. for `tf.device`). `NamedParallelDevice` associates a // name with it, which lets us pack its `ParallelTensor`s into TFE_TensorHandles @@ -141,9 +144,32 @@ absl::optional> ExecuteWithSpecialOps( result.emplace(std::move(result_content)); return result; } + std::vector parallel_inputs; + std::vector> implicitly_broadcast_tensors; + parallel_inputs.reserve(inputs.size()); + implicitly_broadcast_tensors.reserve(inputs.size()); // not tight + for (const auto& input : inputs) { + if (absl::holds_alternative(input)) { + // Non-parallel tensors are implicitly broadcast, i.e. set as the input + // to each parallel operation. + // + // TODO(allenl): There may be smarter ways to do this copy in some + // cases, i.e. with a collective broadcast. We'll need to be careful + // about things that are taken as inputs on the host or on their + // existing device (for multi-device functions). + std::unique_ptr parallel_tensor( + parallel_device.CopyToParallelDevice( + context, absl::get(input), status)); + if (TF_GetCode(status) != TF_OK) return result; + parallel_inputs.push_back(parallel_tensor.get()); + implicitly_broadcast_tensors.emplace_back(std::move(parallel_tensor)); + } else { + parallel_inputs.push_back(absl::get(input)); + } + } absl::optional>> maybe_parallel_results( - parallel_device.Execute(context, std::move(inputs), operation_name, + parallel_device.Execute(context, parallel_inputs, operation_name, attributes, expected_max_outputs, status)); if (!maybe_parallel_results.has_value()) return result; std::vector> parallel_results( diff --git a/tensorflow/c/eager/parallel_device/parallel_device_lib.cc b/tensorflow/c/eager/parallel_device/parallel_device_lib.cc index f56b8d8ac88..225e9a3a12d 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_lib.cc +++ b/tensorflow/c/eager/parallel_device/parallel_device_lib.cc @@ -100,7 +100,7 @@ std::unique_ptr ParallelDevice::DeviceIDs( absl::optional>> ParallelDevice::Execute(TFE_Context* context, - std::vector inputs, + const std::vector& inputs, const char* operation_name, const TFE_OpAttrs* attributes, int expected_max_outputs, TF_Status* status) const { @@ -129,26 +129,10 @@ ParallelDevice::Execute(TFE_Context* context, status); TFE_OpAddAttrs(op.get(), attributes); for (int input_index = 0; input_index < inputs.size(); ++input_index) { - if (absl::holds_alternative(inputs[input_index])) { - // Non-parallel tensors are implicitly broadcast, i.e. set as the input - // to each parallel operation. - // - // TODO(allenl): There may be smarter ways to do this copy in some - // cases, i.e. with a collective broadcast. We'll need to be careful - // about things that are taken as inputs on the host or on their - // existing device (for multi-device functions). - TFE_OpAddInput(op.get(), - absl::get(inputs[input_index]), - status); - if (TF_GetCode(status) != TF_OK) return result; - } else { - // Parallel tensors are divided between operations by device. - TFE_OpAddInput(op.get(), - absl::get(inputs[input_index]) - ->tensor(device_index), - status); - if (TF_GetCode(status) != TF_OK) return result; - } + // Parallel tensors are divided between operations by device. + TFE_OpAddInput(op.get(), inputs[input_index]->tensor(device_index), + status); + if (TF_GetCode(status) != TF_OK) return result; } std::vector op_outputs(expected_max_outputs); int real_num_outputs = expected_max_outputs; diff --git a/tensorflow/c/eager/parallel_device/parallel_device_lib.h b/tensorflow/c/eager/parallel_device/parallel_device_lib.h index 377377bc9c1..8f3f613e535 100644 --- a/tensorflow/c/eager/parallel_device/parallel_device_lib.h +++ b/tensorflow/c/eager/parallel_device/parallel_device_lib.h @@ -52,9 +52,6 @@ using ExecutorPtr = std::unique_ptr; class ParallelTensor; -using MaybeParallelTensorUnowned = - absl::variant; - // Forwards operations to `devices`, maintaining ParallelTensor with components // placed on each underlying device. class ParallelDevice { @@ -79,10 +76,9 @@ class ParallelDevice { // Takes a description of a single operation being executed on the // ParallelDevice, and in turn runs one operation per component device with - // its corresponding inputs from the input ParallelTensors (or - // implicitly-mirrored tensors on other devices). Wraps the resulting - // per-device and per-output TFE_TensorHandles into one ParallelTensor per - // output of the original operation. + // its corresponding inputs from the input ParallelTensors. Wraps the + // resulting per-device and per-output TFE_TensorHandles into one + // ParallelTensor per output of the original operation. // // Attributes are forwarded to executed operations unmodified. // @@ -90,7 +86,7 @@ class ParallelDevice { // TF_OK. Bad statuses are forwarded from underlying `TFE_Execute` calls, or // if sanity checks on dtypes/metadata fail. absl::optional>> Execute( - TFE_Context* context, std::vector inputs, + TFE_Context* context, const std::vector& inputs, const char* operation_name, const TFE_OpAttrs* attributes, int expected_max_outputs, TF_Status* status) const; From 492bb4d4a3fd5aafae620d2b4db1535b2d4f4a7b Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Tue, 9 Jun 2020 15:58:18 -0700 Subject: [PATCH 150/178] Make windows tf-nightly pip package the default with gpu support, similar to linux packages. PiperOrigin-RevId: 315578369 Change-Id: If3bb7d5be339f12368c83e9f880b1c09592bfad5 --- .../tools/ci_build/release/windows/upload_nightly_pip/upload.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/release/windows/upload_nightly_pip/upload.sh b/tensorflow/tools/ci_build/release/windows/upload_nightly_pip/upload.sh index 68486647d83..609c316cca7 100644 --- a/tensorflow/tools/ci_build/release/windows/upload_nightly_pip/upload.sh +++ b/tensorflow/tools/ci_build/release/windows/upload_nightly_pip/upload.sh @@ -21,7 +21,7 @@ source tensorflow/tools/ci_build/release/common.sh sudo pip install --upgrade twine # Copy and rename to tf_nightly -for f in $(ls "${TF_FILE_DIR}"/tf_nightly_cpu*dev*cp3*-cp3*-win_amd64.whl); do +for f in $(ls "${TF_FILE_DIR}"/tf_nightly_gpu*dev*cp3*-cp3*-win_amd64.whl); do copy_to_new_project_name "${f}" tf_nightly done From 5f069e0eaecce83ea5b5d27c6bf7b08d322342c3 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 9 Jun 2020 16:07:18 -0700 Subject: [PATCH 151/178] Update the version string for libtensorflow nightly releases. PiperOrigin-RevId: 315580020 Change-Id: Ic6b2a9e11cb527851900568a8ae43b433a2049ac --- .../tools/ci_build/release/macos/cpu_libtensorflow/build.sh | 3 +++ .../ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh | 5 +++++ .../ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/tensorflow/tools/ci_build/release/macos/cpu_libtensorflow/build.sh b/tensorflow/tools/ci_build/release/macos/cpu_libtensorflow/build.sh index 348778b5f15..e926333a30f 100644 --- a/tensorflow/tools/ci_build/release/macos/cpu_libtensorflow/build.sh +++ b/tensorflow/tools/ci_build/release/macos/cpu_libtensorflow/build.sh @@ -24,4 +24,7 @@ install_bazelisk export DEVELOPER_DIR=/Applications/Xcode_10.3.app/Contents/Developer sudo xcode-select -s "${DEVELOPER_DIR}" +# Update the version string to nightly +./tensorflow/tools/ci_build/update_version.py --nightly + tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh index abb85c18711..f51c7b59582 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/cpu/build.sh @@ -26,4 +26,9 @@ which bazel # Install realpath sudo apt-get install realpath +# Update the version string to nightly +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + ./tensorflow/tools/ci_build/update_version.py --nightly +fi + ./tensorflow/tools/ci_build/linux/libtensorflow.sh diff --git a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh index c399ed2680f..795e3fd2bdd 100644 --- a/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh +++ b/tensorflow/tools/ci_build/release/ubuntu_16/libtensorflow/gpu/build.sh @@ -27,4 +27,9 @@ sudo apt-get install realpath export TF_NEED_CUDA=1 +# Update the version string to nightly +if [ -n "${IS_NIGHTLY_BUILD}" ]; then + ./tensorflow/tools/ci_build/update_version.py --nightly +fi + ./tensorflow/tools/ci_build/linux/libtensorflow.sh From 3dcf574c5bc24e1897b99ae61df69906653f9afc Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Tue, 9 Jun 2020 16:14:26 -0700 Subject: [PATCH 152/178] Move Keras eager microbenchmarks to keras/benchmark. Adds overhead benchmarking for __call__ for Layer and Layer subclasses in layers/core PiperOrigin-RevId: 315581358 Change-Id: Icb76f6d9e3d1829386c22a454c91105c20b28280 --- tensorflow/python/eager/benchmarks_test.py | 42 ------ tensorflow/python/keras/benchmark/BUILD | 12 ++ .../benchmark/eager_microbenchmarks_test.py | 126 ++++++++++++++++++ 3 files changed, 138 insertions(+), 42 deletions(-) mode change 100644 => 100755 tensorflow/python/keras/benchmark/BUILD create mode 100644 tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 315a4cfd056..eb22ddb3c70 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -52,8 +52,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util -from tensorflow.python.keras.engine import base_layer -from tensorflow.python.keras.layers import core as core_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import functional_ops @@ -1420,46 +1418,6 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase): self._run(fn, 10000) - # TODO(b/157587712): Move to keras when benchmarks are setup. - def benchmark_tf_keras_layer_call_overhead(self): - - class OnlyOverheadLayer(base_layer.Layer): - - def call(self, x): - return x - - layer = OnlyOverheadLayer() - x = ops.convert_to_tensor([[1.]]) - - def fn(): - layer(x) - - self._run(fn, 10000) - - # TODO(b/157587712): Move to keras when benchmarks are setup. - def benchmark_tf_keras_dense_overhead(self): - - layer = core_layers.Dense(1) - x = ops.convert_to_tensor([[1.]]) - layer(x) # Warmup call to `build` layer. - - def fn(): - layer(x) - - self._run(fn, 10000) - - # TODO(b/157587712): Move to keras when benchmarks are setup. - def benchmark_tf_keras_flatten_overhead(self): - - layer = core_layers.Flatten() - x = ops.convert_to_tensor([[[1.]]]) - layer(x) # Warmup call to `build` layer. - - def fn(): - layer(x) - - self._run(fn, 10000) - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/keras/benchmark/BUILD b/tensorflow/python/keras/benchmark/BUILD old mode 100644 new mode 100755 index f9dd2d073c5..9fc709ad4de --- a/tensorflow/python/keras/benchmark/BUILD +++ b/tensorflow/python/keras/benchmark/BUILD @@ -1,6 +1,8 @@ # Description: # Implementation of Keras benchmarks. +load("//tensorflow:tensorflow.bzl", "cuda_py_test") + package( default_visibility = ["//visibility:public"], licenses = ["notice"], # Apache 2.0 @@ -31,3 +33,13 @@ py_test( "//third_party/py/numpy", ], ) + +cuda_py_test( + name = "eager_microbenchmarks_test", + size = "medium", + srcs = ["eager_microbenchmarks_test.py"], + python_version = "PY3", + deps = [ + "//tensorflow/python/keras", + ], +) diff --git a/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py b/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py new file mode 100644 index 00000000000..0894546b3b4 --- /dev/null +++ b/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py @@ -0,0 +1,126 @@ +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Microbenchmarks for Keras components in eager mode.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time + +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.layers import core as core_layers +from tensorflow.python.platform import test +from tensorflow.python.util import tf_inspect + + +def _run_benchmark(func, num_iters, execution_mode=None): + ctx = context.context() + with context.execution_mode(execution_mode): + # call func to warm up + func() + if execution_mode == context.ASYNC: + ctx.executor.wait() + start = time.time() + for _ in range(num_iters): + func() + if execution_mode == context.ASYNC: + ctx.executor.wait() + end = time.time() + + return end - start + + +class MicroBenchmarksBase(test.Benchmark): + """Run and report benchmark results.""" + + def run_report(self, run_benchmark, func, num_iters, execution_mode=None): + """Run and report benchmark results.""" + total_time = run_benchmark(func, num_iters, execution_mode) + mean_us = total_time * 1e6 / num_iters + extras = { + "examples_per_sec": float("{0:.3f}".format(num_iters / total_time)), + "us_per_example": float("{0:.3f}".format(total_time * 1e6 / num_iters)) + } + benchmark_name = self._get_benchmark_name() + self.report_benchmark( + iters=num_iters, wall_time=mean_us, extras=extras, name=benchmark_name) + + def _get_benchmark_name(self): + """Mostly copied from benchmark.py _get_name().""" + stack = tf_inspect.stack() + name = None + for frame in stack[::-1]: + f_locals = frame[0].f_locals + f_self = f_locals.get("self", None) + if isinstance(f_self, test.Benchmark): + name = frame[3] # Get the method name + # This is a hack to get around the fact that some methods might have a + # disable_tfrt decorator around them. In that case a function called + # 'decorated' wraps the real called function underneath and so we + # peek one deeper into the stack to get the real name. + if name == "decorated": + continue + else: + break + if name is None: + raise ValueError("Unable to determine calling Benchmark function.") + if context.is_tfrt_enabled(): + name = name + "_tfrt" + return name + + def _run(self, func, num_iters, execution_mode=None): + self.run_report(_run_benchmark, func, num_iters, execution_mode) + + def benchmark_tf_keras_layer_call_overhead(self): + + class OnlyOverheadLayer(base_layer.Layer): + + def call(self, x): + return x + + layer = OnlyOverheadLayer() + x = ops.convert_to_tensor([[1.]]) + + def fn(): + layer(x) + + self._run(fn, 10000) + + def benchmark_tf_keras_dense_overhead(self): + + layer = core_layers.Dense(1) + x = ops.convert_to_tensor([[1.]]) + + def fn(): + layer(x) + + self._run(fn, 10000) + + def benchmark_tf_keras_flatten_overhead(self): + + layer = core_layers.Flatten() + x = ops.convert_to_tensor([[[1.]]]) + + def fn(): + layer(x) + + self._run(fn, 10000) + + +if __name__ == "__main__": + ops.enable_eager_execution() + test.main() From c4b376a48bd9b7da85bafa6e19c3cba84b9440d0 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 9 Jun 2020 16:27:02 -0700 Subject: [PATCH 153/178] TPU library internal change. PiperOrigin-RevId: 315583646 Change-Id: Ibb0f0b375c114f34db9b435fbb0bfc01f1f3d6e0 --- tensorflow/core/tpu/kernels/BUILD | 12 ++ .../core/tpu/kernels/compiled_subgraph.h | 169 ++++++++++++++++++ .../kernels/tpu_compilation_cache_entry.cc | 51 ++++++ .../tpu/kernels/tpu_compilation_cache_entry.h | 42 ++--- .../kernels/tpu_compilation_cache_external.cc | 132 +++++++------- .../kernels/tpu_compilation_cache_external.h | 137 ++++---------- .../kernels/tpu_compilation_cache_lookup.cc | 2 +- .../kernels/tpu_compilation_cache_lookup.h | 6 +- 8 files changed, 348 insertions(+), 203 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/compiled_subgraph.h create mode 100644 tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index eb464fa7461..d9d843b518f 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -119,6 +119,7 @@ cc_library( cc_library( name = "tpu_compilation_cache_entry", + srcs = ["tpu_compilation_cache_entry.cc"], hdrs = [ "tpu_compilation_cache_entry.h", ], @@ -166,6 +167,16 @@ cc_library( ], ) +cc_library( + name = "compiled_subgraph", + hdrs = ["compiled_subgraph.h"], + deps = [ + ":tpu_program_group_interface", + "//tensorflow/core:lib", + "//tensorflow/core/platform:refcount", + ], +) + cc_library( name = "tpu_program_group_interface", hdrs = ["tpu_program_group_interface.h"], @@ -207,6 +218,7 @@ cc_library( "tpu_compilation_cache_external.h", ], deps = [ + ":compiled_subgraph", ":tpu_compilation_cache_entry", ":tpu_compilation_cache_key", ":tpu_compilation_cache_metrics", # buildcleaner: keep diff --git a/tensorflow/core/tpu/kernels/compiled_subgraph.h b/tensorflow/core/tpu/kernels/compiled_subgraph.h new file mode 100644 index 00000000000..1066e4839dd --- /dev/null +++ b/tensorflow/core/tpu/kernels/compiled_subgraph.h @@ -0,0 +1,169 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_COMPILED_SUBGRAPH_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_COMPILED_SUBGRAPH_H_ + +#include +#include + +#include "tensorflow/core/platform/refcount.h" +#include "tensorflow/core/platform/status.h" +#include "tensorflow/core/tpu/kernels/tpu_program_group_interface.h" + +namespace tensorflow { +namespace tpu { + +// Cache for compiled TPU program. +// +// Each key identifies a unique subgraph, and the value is the vector of +// protos that are emitted by compiling the subgraph. +// +// When a subgraph is considered for compilation, the client calls +// +// auto subgraph_key = ; +// auto compile_function = ; +// auto per_step_ref_holder = ; +// int64 uid; +// std::vector proto_key; +// CompileIfKeyAbsent(subgraph_key, per_step_ref_holder, &uid, &proto_key, +// compile_function); +// +// where subgraph_key is the key computed for the subgraph. On success, +// proto_key contains a vector of keys, where proto_key[i] can be used to look +// up the ith proto compiled from the subgraph, and uid contains an identifier +// that can be used in place of key for clients that require cheap +// serializable handles. If the compiled protos were not present in the cache, +// compile_function would be called to generate them. per_step_ref_holder +// extends the lifetime of cached results: it is guaranteed that the protos +// indicated in proto_key will be available for lookup for at least as long as +// per_step_ref_holder is not deleted. +// +// If the caller passes nullptr instead of a per_step_ref_holder then the +// caller is responsible for calling Release(subgraph_key) once for every call +// to CompileIfKeyAbsent(subgraph_key, ...) to discard the reference to the +// compilation results, after the caller is sure it will not look up the +// compiled executables again. +// +// Subsequently the client can call +// +// std::unique_ptr entry; +// Lookup(proto_key, &entry); +// auto proto = entry->get(); +// +// or +// +// std::unique_ptr entry; +// Lookup(uid, proto_index, &entry); +// auto proto = entry->get(); +// +// to access a cached proto. +// TODO(misard) Switch the existing TPU ops to use uid+proto_index instead of +// string keys for proto lookups. +// +// +// Usage details within the system: +// +// This cache lives in the resource manager of the TPU_SYSTEM device where the +// compiler runs, typically worker 0 of the system. The cache is discarded and +// a new one created whenever the system is reinitialized. +// +// A compiled subgraph is placed into the cache using a key that is a +// combination of the function name, guaranteed_constants, the shapes of the +// dynamic inputs to the subgraph, and the function library in use at the time +// of execution. +// +// Whenever a compile Op is run, it looks to see if there is already an entry +// in the cache corresponding to that Op and the current dynamic shapes, and +// creates one if not. The entry is marked as most recently used in the cache +// by the compile Op. The entry is reference counted. The cache owns one entry +// , and each step that has executed a compile Op referring to the entry owns +// a reference until that step completes. +// +// If the cache exceeds a configured storage limit, entries are marked for +// eviction in order of least recently used. An entry is not evicted until all +// references to it are discarded, so an entry that is marked for eviction can +// still be looked up by the execute Ops in a running step. If another Compile +// Op looks up an entry that is marked for eviction, the entry will be +// unmarked and set to most recently used. +// +struct CompiledSubgraph : public core::RefCounted { + // TODO(henrytan): once `TpuCompilationCache` and + // `TpuCompilationCacheExternal` inherits from `TpuCompilationCacheInterface` + // update void* with `TpuCompilationCacheInterface` + void* parent = nullptr; // Not owned. + + bool initialized = false; + + // The Status returned by the compilation function when the entry is + // initialized. This status will be returned to any client that requests the + // entry. + Status initialization_status; + + // Counter to keep track of LRU entries for the eviction policy. + int64_t last_use = -1; + + // The unique key describing this entry. + std::string subgraph_key; + + // The uid describing this entry. + int64_t uid; + + // Compilation cache proto key to identify the cache entry. + std::vector proto_key; + + // The number of 'external' client-held references to the entry. + int external_references = 0; + + // The sum of the SpaceUsed of each of the elements of programs; an estimate + // of how much RAM the entry consumes, used to determine when entries must + // be marked for eviction. + int64_t total_size = 0; + + // Debug info in case we miss. + std::string cache_entry_debug_string; + + // Entries representing the associated sharding and unsharding programs, + // which share the same life time of the owning main entry, so we always use + // the main entry's ref count. + std::unique_ptr sharding_entry; + std::unique_ptr unsharding_entry; + + // Only used for the nested sharding/unsharding entries to point to the + // owning main entry. + CompiledSubgraph* main_entry = nullptr; + + // Compiled Tpu program. + std::unique_ptr tpu_program_group; + + // Computes total program size. + size_t ComputeTotalSize() const { + CHECK_EQ(total_size, 0); + int64 size = tpu_program_group->program_size(); + + if (sharding_entry != nullptr) { + size += sharding_entry->total_size; + } + if (unsharding_entry != nullptr) { + size += unsharding_entry->total_size; + } + return size; + } +}; + +} // namespace tpu +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_TPU_KERNELS_COMPILED_SUBGRAPH_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc new file mode 100644 index 00000000000..c6c1490b22d --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.cc @@ -0,0 +1,51 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" + +namespace tensorflow { +namespace tpu { + +TpuCompilationCacheEntry::TpuCompilationCacheEntry( + const TpuProgramGroupInterface* tpu_program_group, int core_index) + : tpu_program_group_(down_cast(tpu_program_group)), + core_index_(core_index) {} + +// Constructor for an empty entry. +TpuCompilationCacheEntry::TpuCompilationCacheEntry() + : tpu_program_group_(nullptr) {} + +const TPUExecutableInfoProto* TpuCompilationCacheEntry::get_executable_info() + const { + return &(tpu_program_group_->executable_info()); +} + +const TPUHostTransferInfoProto* +TpuCompilationCacheEntry::get_host_transfer_info() const { + return &(tpu_program_group_->host_transfer_info()); +} + +const xla::HloProto* TpuCompilationCacheEntry::get_hlo_metadata() const { + return tpu_program_group_->hlo_metadatas()[core_index_].get(); +} + +// TODO(henrytan,jiawenhao): When should we expect more than one +// XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. +const XLA_TpuProgram* TpuCompilationCacheEntry::get_tpu_program() const { + CHECK_EQ(tpu_program_group_->program_count(), 1); + return tpu_program_group_->tpu_programs()[core_index_]; +} + +} // namespace tpu +} // namespace tensorflow diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h index 5fc60236bf9..a561fc51778 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h @@ -23,37 +23,23 @@ limitations under the License. namespace tensorflow { namespace tpu { -class CompilationCacheEntry { +// A version of `CompilationCacheEntry` that exposes Tpu binary program +// `XLA_TpuProgram`. +class TpuCompilationCacheEntry { public: - explicit CompilationCacheEntry( - std::unique_ptr tpu_program) - : tpu_program_(std::move(tpu_program)) {} - + explicit TpuCompilationCacheEntry( + const TpuProgramGroupInterface* tpu_program_group, int core_index); // Constructor for an empty entry. - CompilationCacheEntry() - : tpu_program_(nullptr) {} - - const TPUExecutableInfoProto* get_executable_info() const { - return &tpu_program_->executable_info(); - } - - const TPUHostTransferInfoProto* get_host_transfer_info() const { - return &tpu_program_->host_transfer_info(); - } - - const xla::HloProto* get_hlo_metadata() const { - return &tpu_program_->hlo_metadata(); - } - - // TODO(henrytan,jiawenhao): When should we expect more than one - // XLA_TpuProgram* per TpuProgram? Remove the program_count CHECK below then. - const XLA_TpuProgram* get_tpu_program() const { - CHECK_EQ(tpu_program_->program_count(), 1); - return tpu_program_->tpu_programs()[0]; - } + TpuCompilationCacheEntry(); + const TPUExecutableInfoProto* get_executable_info() const; + const TPUHostTransferInfoProto* get_host_transfer_info() const; + const xla::HloProto* get_hlo_metadata() const; + // TODO(henrytan): maybe nicer to return C++ wrapper of `XLA_TpuProgram` + const XLA_TpuProgram* get_tpu_program() const; private: - std::unique_ptr tpu_program_; + const TpuProgramGroup* tpu_program_group_; + int core_index_; }; // Base class for a reference to a cached proto. A unique_ptr to a @@ -66,7 +52,7 @@ class CompilationCacheEntryRef { // Returns a CompilationCacheEntry that should not be used beyond the lifetime // of the CompilationCacheEntryRef. - virtual CompilationCacheEntry get() = 0; + virtual TpuCompilationCacheEntry get() = 0; }; // Base class that holds references to compiled protos so that the protos are diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc index 2f550b20774..614dfbdf577 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.cc @@ -33,21 +33,20 @@ namespace tpu { namespace { -using CompilationEntry = TpuCompilationCacheInterface::CompilationEntry; - int64 get_uid() { uint64 unsigned_rand = random::New64() & INT64_MAX; return static_cast(unsigned_rand); } -void PopulateEntry(const std::string& key, CompilationEntry* entry, - std::unique_ptr tpu_program) { +void PopulateEntry(const std::string& key, CompiledSubgraph* entry, + TpuProgramGroup& tpu_program_group) { // Make the unique keys for each cached proto. - for (int i = 0; i < tpu_program->program_count(); ++i) { + for (int i = 0; i < tpu_program_group.program_count(); ++i) { entry->proto_key.push_back(ProtoKeyForComputation(key, i)); } - entry->tpu_program = std::move(tpu_program); + entry->tpu_program_group = + absl::make_unique(std::move(tpu_program_group)); entry->initialized = true; } @@ -124,8 +123,7 @@ std::string CreateConfigPrefix(const TPUCompileMetadataProto& metadata) { } // namespace -TpuCompilationCacheInterface::TpuCompilationCacheInterface( - int64_t max_cache_size) +TpuCompilationCacheExternal::TpuCompilationCacheExternal(int64_t max_cache_size) : max_cache_size_(max_cache_size) { if (max_cache_size < 0) { LOG(FATAL) << "`max_cache_size` value must be greater than equal to 0"; @@ -133,8 +131,8 @@ TpuCompilationCacheInterface::TpuCompilationCacheInterface( VLOG(1) << "Created compilation cache size " << max_cache_size_ << " bytes."; } -TpuCompilationCacheInterface::~TpuCompilationCacheInterface() { - VLOG(1) << "TpuCompilationCacheInterface::~TpuCompilationCacheInterface()"; +TpuCompilationCacheExternal::~TpuCompilationCacheExternal() { + VLOG(1) << "TpuCompilationCacheExternal::~TpuCompilationCacheExternal()"; // A buggy client may be holding onto a reference, or a client might have // crashed while holding onto a reference. In either case, discard all // outstanding client references to avoid leaking storage. @@ -156,7 +154,7 @@ TpuCompilationCacheInterface::~TpuCompilationCacheInterface() { CHECK_EQ(marked_for_eviction_size_, 0); } -std::string TpuCompilationCacheInterface::FindCacheKey( +std::string TpuCompilationCacheExternal::FindCacheKey( const TpuCompilationCacheKey& subgraph_key) const { if (!subgraph_key.has_guaranteed_const) { return subgraph_key.prefix; @@ -176,9 +174,9 @@ std::string TpuCompilationCacheInterface::FindCacheKey( return ""; } -void TpuCompilationCacheInterface::InsertEntry( +void TpuCompilationCacheExternal::InsertEntry( const std::string& cache_key, const TpuCompilationCacheKey& subgraph_key, - CompilationEntry* entry) { + CompiledSubgraph* entry) { entry->parent = this; entry->subgraph_key = cache_key; entry->uid = get_uid(); @@ -199,11 +197,11 @@ void TpuCompilationCacheInterface::InsertEntry( cache_key)); } -CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( +CompiledSubgraph* TpuCompilationCacheExternal::InitializeEntry( const string& key, const std::function& initialize_program, const TpuCompilationCacheKey& subgraph_key) { - CompilationEntry* main_entry = new CompilationEntry(); + CompiledSubgraph* main_entry = new CompiledSubgraph(); // Add the entry to the cache, with size zero since there are no compiled // programs in it. Once the subgraph has been compiled, @@ -220,14 +218,14 @@ CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( // can proceed during the (potentially lengthy) initialization. Status initialization_status; - auto tpu_program = absl::make_unique(); + TpuProgramGroup tpu_program_group; { mu_.Unlock(); { profiler::TraceMe compile_programs_traceme( "TPU compilation cache compile", /*level=*/2); - initialization_status = initialize_program(tpu_program.get()); + initialization_status = initialize_program(&tpu_program_group); } mu_.Lock(); } @@ -236,20 +234,20 @@ CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( // Add the entry to the uid index. auto uid_inserted = entries_by_uid_.insert( - std::pair(main_entry->uid, main_entry)); + std::pair(main_entry->uid, main_entry)); CHECK(uid_inserted.second); if (initialization_status.ok()) { // Compute the entries total size once all members are initialized. - main_entry->total_size = tpu_program->program_size(); + main_entry->total_size = tpu_program_group.program_size(); } // TODO(henrytan): handle sharding/unsharding. - PopulateEntry(key, main_entry, std::move(tpu_program)); + PopulateEntry(key, main_entry, tpu_program_group); for (int64 i = 0; i < main_entry->proto_key.size(); ++i) { auto entry_inserted = entries_by_proto_key_.insert( - std::pair>( + std::pair>( main_entry->proto_key[i], std::make_pair(main_entry, i))); CHECK(entry_inserted.second); } @@ -261,7 +259,7 @@ CompilationEntry* TpuCompilationCacheInterface::InitializeEntry( } /*static*/ TpuCompilationCacheKey -TpuCompilationCacheInterface::CreateCompilationCacheKey( +TpuCompilationCacheExternal::CreateCompilationCacheKey( absl::string_view function_name, uint64 function_library_fingerprint, absl::string_view mlir_module, const tensorflow::OpInputList& guaranteed_constants, @@ -323,15 +321,15 @@ TpuCompilationCacheInterface::CreateCompilationCacheKey( return key; } -TpuCompilationRefHolder* TpuCompilationCacheInterface::MakePerStepRefHolder() { +TpuCompilationRefHolder* TpuCompilationCacheExternal::MakePerStepRefHolder() { return new RefHolder(this); } -Status TpuCompilationCacheInterface::MarkEntryForEviction(int64 subgraph_uid) { +Status TpuCompilationCacheExternal::MarkEntryForEviction(int64 subgraph_uid) { profiler::TraceMe key_release_traceme( "TPU compilation cache possibly evict uid", /*level=*/2); - CompilationEntry* deleted_entry = nullptr; + CompiledSubgraph* deleted_entry = nullptr; { absl::MutexLock lock(&mu_); auto iter = entries_by_uid_.find(subgraph_uid); @@ -341,7 +339,7 @@ Status TpuCompilationCacheInterface::MarkEntryForEviction(int64 subgraph_uid) { } // Mark entry for eviction. - CompilationEntry* subgraph_to_evict = iter->second; + CompiledSubgraph* subgraph_to_evict = iter->second; // If there are external references, should not use this API. if (subgraph_to_evict->external_references != 0) { return errors::Internal("Subgraph ", subgraph_to_evict->subgraph_key, @@ -372,11 +370,11 @@ Status TpuCompilationCacheInterface::MarkEntryForEviction(int64 subgraph_uid) { return Status::OK(); } -Status TpuCompilationCacheInterface::Release(int64 subgraph_uid) { +Status TpuCompilationCacheExternal::Release(int64 subgraph_uid) { profiler::TraceMe key_release_traceme("TPU compilation cache release uid", /*level=*/2); - CompilationEntry* deleted_entry = nullptr; + CompiledSubgraph* deleted_entry = nullptr; { absl::MutexLock lock(&mu_); auto iter = entries_by_uid_.find(subgraph_uid); @@ -401,15 +399,15 @@ Status TpuCompilationCacheInterface::Release(int64 subgraph_uid) { return Status::OK(); } -void TpuCompilationCacheInterface::UnloadAndDestroy(CompilationEntry* entry) { +void TpuCompilationCacheExternal::UnloadAndDestroy(CompiledSubgraph* entry) { if (!entry) return; CHECK(entry->RefCountIsOne()); - entry->tpu_program->UnloadAndDestroyPrograms(); + entry->tpu_program_group->UnloadAndDestroyPrograms(); entry->Unref(); } -size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { +size_t TpuCompilationCacheExternal::RemoveEntry(const string& key) { auto erased = cache_store_.erase(key); TpuCompilationCacheMetrics::SetCacheEntryCount(cache_store_.size()); auto parsed_key_or_status = ParseCompilationCacheKey(key); @@ -426,8 +424,8 @@ size_t TpuCompilationCacheInterface::RemoveEntry(const string& key) { return erased; } -ABSL_MUST_USE_RESULT CompilationEntry* -TpuCompilationCacheInterface::DiscardEntryRef(CompilationEntry* entry) { +ABSL_MUST_USE_RESULT CompiledSubgraph* +TpuCompilationCacheExternal::DiscardEntryRef(CompiledSubgraph* entry) { if (entry->RefCountIsOne()) { // The last reference to this entry is going away, so really delete it from // the cache in such a way that it can't be restored by being looked up @@ -459,9 +457,9 @@ TpuCompilationCacheInterface::DiscardEntryRef(CompilationEntry* entry) { return nullptr; } -void TpuCompilationCacheInterface::DiscardEntryRefs( - gtl::ArraySlice entries) { - std::vector removed_entries; +void TpuCompilationCacheExternal::DiscardEntryRefs( + gtl::ArraySlice entries) { + std::vector removed_entries; { absl::MutexLock lock(&mu_); @@ -480,9 +478,9 @@ void TpuCompilationCacheInterface::DiscardEntryRefs( } } -ABSL_MUST_USE_RESULT CompilationEntry* -TpuCompilationCacheInterface::MarkOldestEntryForEviction() { - CompilationEntry* entry_to_mark = entries_by_last_use_.begin()->second; +ABSL_MUST_USE_RESULT CompiledSubgraph* +TpuCompilationCacheExternal::MarkOldestEntryForEviction() { + CompiledSubgraph* entry_to_mark = entries_by_last_use_.begin()->second; VLOG(1) << "Marking " << entry_to_mark->subgraph_key << " for eviction"; entries_by_last_use_.erase(entry_to_mark->last_use); cache_size_ -= entry_to_mark->total_size; @@ -495,8 +493,8 @@ TpuCompilationCacheInterface::MarkOldestEntryForEviction() { return DiscardEntryRef(entry_to_mark); } -void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( - CompilationEntry* entry, std::vector* removed_entries) { +void TpuCompilationCacheExternal::LookupEntryMarkedForEviction( + CompiledSubgraph* entry, std::vector* removed_entries) { // The entry was previously marked for eviction (or is newly created) so // unmark it. Add a reference (owned by the cache), update the cache size, and // mark something old for eviction if necessary. @@ -516,14 +514,14 @@ void TpuCompilationCacheInterface::LookupEntryMarkedForEviction( } } -Status TpuCompilationCacheInterface::ToSubEntryRef( +Status TpuCompilationCacheExternal::ToSubEntryRef( CompilationCacheEntryRef* entry, CompilationCacheFetchTarget fetch_target) const { - return static_cast(entry)->ToSubEntryRef(fetch_target); + return static_cast(entry)->ToSubEntryRef(fetch_target); } -TpuCompilationCacheInterface::EntryRefImpl::EntryRefImpl( - TpuCompilationCacheInterface* parent, CompilationEntry* entry, int index) +TpuCompilationCacheExternal::TpuEntryRefImpl::TpuEntryRefImpl( + TpuCompilationCacheExternal* parent, CompiledSubgraph* entry, int index) : parent_(parent), entry_(entry), index_(index) { if (entry_ == nullptr) { return; @@ -537,7 +535,7 @@ TpuCompilationCacheInterface::EntryRefImpl::EntryRefImpl( } } -TpuCompilationCacheInterface::EntryRefImpl::~EntryRefImpl() { +TpuCompilationCacheExternal::TpuEntryRefImpl::~TpuEntryRefImpl() { if (entry_ == nullptr) { return; } @@ -548,18 +546,18 @@ TpuCompilationCacheInterface::EntryRefImpl::~EntryRefImpl() { } } -CompilationCacheEntry TpuCompilationCacheInterface::EntryRefImpl::get() { +TpuCompilationCacheEntry TpuCompilationCacheExternal::TpuEntryRefImpl::get() { if (entry_ == nullptr) { // Create an empty entry if the entry is nullptr. This corresponds to // non-existing sharding/unsharding entries. - return CompilationCacheEntry(); + return TpuCompilationCacheEntry(); } - return CompilationCacheEntry(std::move(entry_->tpu_program)); + return TpuCompilationCacheEntry(entry_->tpu_program_group.get(), index_); } -Status TpuCompilationCacheInterface::EntryRefImpl::ToSubEntryRef( +Status TpuCompilationCacheExternal::TpuEntryRefImpl::ToSubEntryRef( CompilationCacheFetchTarget fetch_target) { - CompilationEntry* target = nullptr; + CompiledSubgraph* target = nullptr; switch (fetch_target) { case CompilationCacheFetchTarget::MAIN: target = entry_; @@ -585,7 +583,7 @@ Status TpuCompilationCacheInterface::EntryRefImpl::ToSubEntryRef( return Status::OK(); } -Status TpuCompilationCacheInterface::Lookup( +Status TpuCompilationCacheExternal::Lookup( int64 uid, int proto_index, std::unique_ptr* entry) { entry->reset(); @@ -599,18 +597,18 @@ Status TpuCompilationCacheInterface::Lookup( if (iter == entries_by_uid_.end()) { return errors::NotFound("No subgraph found for uid ", uid); } - CompilationEntry* cache_entry = iter->second; + CompiledSubgraph* cache_entry = iter->second; if (proto_index < 0 || - proto_index >= cache_entry->tpu_program->program_size()) { + proto_index >= cache_entry->tpu_program_group->program_size()) { return errors::NotFound("No proto found for core index ", proto_index, " in subgraph with uid ", uid); } *entry = std::unique_ptr( - new EntryRefImpl(this, cache_entry, proto_index)); + new TpuEntryRefImpl(this, cache_entry, proto_index)); return Status::OK(); } -Status TpuCompilationCacheInterface::Lookup( +Status TpuCompilationCacheExternal::Lookup( const string& proto_key, std::unique_ptr* entry) { entry->reset(); @@ -622,19 +620,19 @@ Status TpuCompilationCacheInterface::Lookup( if (iter == entries_by_proto_key_.end()) { return errors::NotFound("No proto found for key ", proto_key); } - CompilationEntry* cache_entry = iter->second.first; + CompiledSubgraph* cache_entry = iter->second.first; int proto_index = iter->second.second; *entry = std::unique_ptr( - new EntryRefImpl(this, cache_entry, proto_index)); + new TpuEntryRefImpl(this, cache_entry, proto_index)); return Status::OK(); } -Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( +Status TpuCompilationCacheExternal::CompileIfKeyAbsentHelper( const TpuCompilationCacheKey& subgraph_key, const SessionMetadata* session_metadata, TpuCompilationRefHolder* per_step_ref_holder, int64* uid, std::vector* proto_key, std::vector* may_modify_variables, - std::vector* removed_entries, + std::vector* removed_entries, std::vector>* hlo_metadata, const std::function& compile_function) { profiler::TraceMe subgraph_lookup_traceme( @@ -651,7 +649,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( const std::string session_name = SessionNameFromMetadata(session_metadata); - CompilationEntry* entry = nullptr; + CompiledSubgraph* entry = nullptr; if (is_new_key) { cache_key = ConstructCompilationCacheKey(subgraph_key); TpuCompilationCacheMetrics::IncrementCacheLookupCount( @@ -702,7 +700,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( // If session_name is present, log some additional stats related to HBM // here, so that they can be associated directly to the session. if (!session_name.empty()) { - entry->tpu_program->LogProgramMemorySummary(); + entry->tpu_program_group->LogProgramMemorySummary(); } } else { TpuCompilationCacheMetrics::IncrementCacheLookupCount(true, session_name); @@ -723,15 +721,15 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( entry->Ref(); // Block if necessary until the subgraph has been initialized. mu_.Await(absl::Condition( - +[](CompilationEntry* e) { return e->initialized; }, entry)); + +[](CompiledSubgraph* e) { return e->initialized; }, entry)); } // Let the caller know the uid of the entry. *uid = entry->uid; // Let the caller know the keys for each of the cached protos. *proto_key = entry->proto_key; - *may_modify_variables = entry->tpu_program->may_modify_variables(); - *hlo_metadata = entry->hlo_metadata; + *may_modify_variables = entry->tpu_program_group->may_modify_variables(); + *hlo_metadata = entry->tpu_program_group->hlo_metadatas(); // If the caller didn't supply a per_step_ref_holder then the caller is going // to manually release the reference later via a call to Release(). @@ -769,7 +767,7 @@ Status TpuCompilationCacheInterface::CompileIfKeyAbsentHelper( return entry->initialization_status; } -tensorflow::Status TpuCompilationCacheInterface::CompileIfKeyAbsent( +tensorflow::Status TpuCompilationCacheExternal::CompileIfKeyAbsent( const TpuCompilationCacheKey& cache_key, const tensorflow::SessionMetadata* session_metadata, TpuCompilationRefHolder* per_step_ref_holder, int64* uid, @@ -777,7 +775,7 @@ tensorflow::Status TpuCompilationCacheInterface::CompileIfKeyAbsent( std::vector>* hlo_metadata, const std::function& compile_function) { - std::vector removed_entries; + std::vector removed_entries; auto status = CompileIfKeyAbsentHelper( cache_key, session_metadata, per_step_ref_holder, uid, proto_key, may_modify_variables, &removed_entries, hlo_metadata, compile_function); diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h index c3460c7e7bf..eff2afde108 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_external.h @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/platform/refcount.h" #include "tensorflow/core/protobuf/tpu/compile_metadata.pb.h" +#include "tensorflow/core/tpu/kernels/compiled_subgraph.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache.pb.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_entry.h" #include "tensorflow/core/tpu/kernels/tpu_compilation_cache_key.h" @@ -52,84 +53,15 @@ class TpuCompilationRefHolder : public ResourceBase { ~TpuCompilationRefHolder() override = default; }; -class TpuCompilationCacheInterface : public ResourceBase { +class TpuCompilationCacheExternal : public ResourceBase { public: using Status = ::stream_executor::port::Status; - // An entry in the compilation cache. The entry is deleted once it has been - // marked for eviction from the cache _and_ all steps that use it have - // completed. When the entry is first created, it is uninitialized and a - // client-supplied compilation function is run outside the cache's lock to - // generate the programs to be stored in the entry. Any other client that - // requests the entry will block until it has been initialized. Each entry has - // a last_use value that set from a monotonically-increasing counter in the - // cache whenever the entry is referenced. When the cache becomes full, - // entries are marked for eviction in LRU order. - // - // The bridge can request XLA to generate separate sharding and unsharding - // programs along with the main program; we use nested fields sharding_entry, - // unsharding_entry to store them under the main entry, and these two fields - // must be either both present or both absent. They have a back pointer - // main_entry to refer to the main program. These nested entries share the - // same cache key and the same lifetime as the main entry, so we use the - // refcount on the main entry to track the access to any of them. - // /-------------------------------\ - // v \ - // main_entry (refcount) -> sharding_entry -> main_entry - // ^ \ - // | \-> unsharding_entry -> main_entry - // \--------------------------------------/ - struct CompilationEntry : public core::RefCounted { - TpuCompilationCacheInterface* parent = nullptr; // Not owned. - bool initialized = false; - - // The Status returned by the compilation function when the entry is - // initialized. This status will be returned to any client that requests the - // entry. - Status initialization_status; - - // The uid describing this entry. - int64 uid; - std::vector proto_key; - - // Counter to keep track of LRU entries for the eviction policy. - int64 last_use = -1; - - // The unique key describing this entry. - std::string subgraph_key; - - // Entries representing the associated sharding and unsharding programs, - // which share the same life time of the owning main entry, so we always use - // the main entry's ref count. - std::unique_ptr sharding_entry; - std::unique_ptr unsharding_entry; - - // The number of 'external' client-held references to the entry. - int external_references = 0; - - std::vector> hlo_metadata; - - // The sum of the SpaceUsed of each of the elements of programs; an estimate - // of how much RAM the entry consumes, used to determine when entries must - // be marked for eviction. - int64 total_size = 0; - - // Only used for the nested sharding/unsharding entries to point to the - // owning main entry. - CompilationEntry* main_entry = nullptr; - - // Debug info in case we miss. - string cache_entry_debug_string; - - // Compiled Tpu program. - std::unique_ptr tpu_program; - }; - - explicit TpuCompilationCacheInterface(int64_t max_cache_size); - ~TpuCompilationCacheInterface() override; - TpuCompilationCacheInterface(const TpuCompilationCacheInterface&) = delete; - TpuCompilationCacheInterface& operator=(const TpuCompilationCacheInterface&) - = delete; + explicit TpuCompilationCacheExternal(int64_t max_cache_size); + ~TpuCompilationCacheExternal() override; + TpuCompilationCacheExternal(const TpuCompilationCacheExternal&) = delete; + TpuCompilationCacheExternal& operator=(const TpuCompilationCacheExternal&) = + delete; Status CompileIfKeyAbsent( const TpuCompilationCacheKey& cache_key, @@ -148,7 +80,7 @@ class TpuCompilationCacheInterface : public ResourceBase { const tensorflow::tpu::TPUCompileMetadataProto& metadata, const TpuMeshStateInterface& mesh_state); - string DebugString() const override { return "TpuCompilationCacheInterface"; } + string DebugString() const override { return "TpuCompilationCacheExternal"; } // Makes a reference holder for this cache, that can be stored in the per-step // resource manager and will ensure that compiled entries persist until the @@ -201,13 +133,13 @@ class TpuCompilationCacheInterface : public ResourceBase { // Wrapper for a cache entry that holds a reference to the entry until the // wrapper is deleted. This wrapper is the concrete type of // CompilationCacheEntryRef returned by Lookup. - class EntryRefImpl : public CompilationCacheEntryRef { + class TpuEntryRefImpl : public CompilationCacheEntryRef { public: - EntryRefImpl(TpuCompilationCacheInterface* parent, CompilationEntry* entry, - int index); - ~EntryRefImpl() override; + TpuEntryRefImpl(TpuCompilationCacheExternal* parent, + CompiledSubgraph* entry, int index); + ~TpuEntryRefImpl() override; - CompilationCacheEntry get() override; + TpuCompilationCacheEntry get() override; // Mutates this ref to point to the entry's subentry (for // sharding/unsharding) or main entry (unchanged) as specified by @@ -220,10 +152,10 @@ class TpuCompilationCacheInterface : public ResourceBase { Status ToSubEntryRef(CompilationCacheFetchTarget fetch_target); private: - TpuCompilationCacheInterface* parent_; // Not owned. + TpuCompilationCacheExternal* parent_; // Not owned. // A reference to entry_ is acquired in the constructor and released via // parent->DiscardEntryRefs in the destructor. - CompilationEntry* entry_; + CompiledSubgraph* entry_; // The program in entry_ that is returned by the get method. int index_; }; @@ -232,7 +164,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // CompiledSubgraph entries. class RefHolder : public TpuCompilationRefHolder { public: - explicit RefHolder(TpuCompilationCacheInterface* parent) : parent_(parent) { + explicit RefHolder(TpuCompilationCacheExternal* parent) : parent_(parent) { parent_->Ref(); } ~RefHolder() override { @@ -243,17 +175,15 @@ class TpuCompilationCacheInterface : public ResourceBase { // Adds entry to the list of entries that will be released when the // RefHolder is destroyed. Each entry is released via a call to // parent_->DiscardEntryRefs. - void AddRef(CompilationEntry* entry) { - entries_.push_back(entry); - } + void AddRef(CompiledSubgraph* entry) { entries_.push_back(entry); } string DebugString() const override { - return "TpuCompilationCacheInterface::RefHolder"; + return "TpuCompilationCacheExternal::RefHolder"; } private: - TpuCompilationCacheInterface* parent_; // Not owned. - std::vector entries_; + TpuCompilationCacheExternal* parent_; // Not owned. + std::vector entries_; }; // The bulk of implementation of CompileIfKeyAbsent() with the exception @@ -265,7 +195,7 @@ class TpuCompilationCacheInterface : public ResourceBase { const SessionMetadata* session_metadata, TpuCompilationRefHolder* per_step_ref_holder, int64* uid, std::vector* proto_key, std::vector* may_modify_variables, - std::vector* removed_entries, + std::vector* removed_entries, std::vector>* hlo_metadata, const std::function& compile_function); @@ -276,18 +206,17 @@ class TpuCompilationCacheInterface : public ResourceBase { // from cache_ and is returned to the caller; which must eventually call // UnloadAndDestroy(). We do not call UnloadAndDestroy within DiscardEntryRef // to avoid holding the lock during program unloading. - ABSL_MUST_USE_RESULT CompilationEntry* DiscardEntryRef( - CompilationEntry* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + ABSL_MUST_USE_RESULT CompiledSubgraph* DiscardEntryRef( + CompiledSubgraph* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Convenience method called by ~RefHolder without mu_ held. Calls // DiscardEntryRef on every element of entries. - void DiscardEntryRefs( - gtl::ArraySlice entries); + void DiscardEntryRefs(gtl::ArraySlice entries); // Marks the oldest unmarked entry for eviction. Requires that there is at // least one such entry. In case the evicted entry had only 1 reference it // is removed from the cache and returned to the caller which must eventually // call UnloadAndDestroy. - CompilationEntry* MarkOldestEntryForEviction() + CompiledSubgraph* MarkOldestEntryForEviction() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Updates datastructures to indicate that entry, which had been marked for @@ -309,7 +238,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // removed_entries. The caller must eventually delete them by calling // UnloadAndDestroy. void LookupEntryMarkedForEviction( - CompilationEntry* entry, std::vector* removed_entries) + CompiledSubgraph* entry, std::vector* removed_entries) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Removes the entry with given key from cache. @@ -318,7 +247,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // Inserts the given key and entry to cache. void InsertEntry(const std::string& key, const TpuCompilationCacheKey& subgraph_key, - CompilationEntry* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); + CompiledSubgraph* entry) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_); // Returns the cache key matching given subgraph_key. std::string FindCacheKey(const TpuCompilationCacheKey& subgraph_key) const @@ -330,7 +259,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // call LookupEntryMarkedForEviction after InitializeEntry. // // **InitializeEntry releases mu_ during the call to initialize_programs.** - CompilationEntry* InitializeEntry( + CompiledSubgraph* InitializeEntry( const string& key, const std::function& initialize_program, const TpuCompilationCacheKey& subgraph_key) @@ -340,7 +269,7 @@ class TpuCompilationCacheInterface : public ResourceBase { // and deletes the entry itself. It is assumed no one else has a reference // to it and all related keys had already been removed from the cache. // The call can perform device IO so no locks should be held while calling it. - void UnloadAndDestroy(CompilationEntry* entry) ABSL_LOCKS_EXCLUDED(mu_); + void UnloadAndDestroy(CompiledSubgraph* entry) ABSL_LOCKS_EXCLUDED(mu_); // The maximum size of entries that are stored in the cache before entries are // marked for eviction. @@ -369,24 +298,24 @@ class TpuCompilationCacheInterface : public ResourceBase { // All the subgraph entries that can be looked up in the cache. An entry is // marked for eviction iff it is present in cache_ and not in // entries_by_last_use_. - std::unordered_map cache_store_ + std::unordered_map cache_store_ ABSL_GUARDED_BY(mu_); // All the subgraph entries that can be looked up in the cache, indexed by // uid. - absl::node_hash_map entries_by_uid_ + absl::node_hash_map entries_by_uid_ ABSL_GUARDED_BY(mu_); // All the protos that can be looked up in the cache, indexed by proto // key. The value of the map is a subgraph and the index of the proto compiled // for that subgraph. - std::unordered_map> + std::unordered_map> entries_by_proto_key_ ABSL_GUARDED_BY(mu_); // Map from last_use to entry, used to mark entries for eviction in LRU // order. If an entry's last_use counter is not present as a key in // entries_by_last_use_ then the entry has been marked for eviction. - std::map entries_by_last_use_ ABSL_GUARDED_BY(mu_); + std::map entries_by_last_use_ ABSL_GUARDED_BY(mu_); }; } // namespace tpu diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc index f4f8dbfc80f..8b2e832a69e 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.cc @@ -42,7 +42,7 @@ std::string GetName(CompilationCacheFetchTarget target) { } // namespace TpuCompilationCacheLocalLookup::TpuCompilationCacheLocalLookup( - TpuCompilationCacheInterface* cache) + TpuCompilationCacheExternal* cache) : cache_(cache) {} TpuCompilationCacheLocalLookup::~TpuCompilationCacheLocalLookup() { diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h index 138777a438c..0d068e1bdd1 100644 --- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h +++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_lookup.h @@ -67,14 +67,14 @@ class TpuCompilationCacheLookup : public ResourceBase { }; // Forward declaration to break cycle dependency graph. -class TpuCompilationCacheInterface; +class TpuCompilationCacheExternal; // Class for looking up ISA protos when the execute and compile Op are in the // same address space. The proto is simply looked up in the compilation cache, // without any serialization taking place. class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { public: - explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheInterface* cache); + explicit TpuCompilationCacheLocalLookup(TpuCompilationCacheExternal* cache); ~TpuCompilationCacheLocalLookup() override; Status Lookup(const string& proto_key, @@ -90,7 +90,7 @@ class TpuCompilationCacheLocalLookup : public TpuCompilationCacheLookup { private: // The subgraph compilation cache, in the same process address space where the // lookups are happening. - TpuCompilationCacheInterface* cache_; + TpuCompilationCacheExternal* cache_; }; } // namespace tpu From b8de8f444ecf29cadc9936dc02e54b73e241eedd Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 9 Jun 2020 16:27:03 -0700 Subject: [PATCH 154/178] Add dict input test case to tuple wrapping tests PiperOrigin-RevId: 315583648 Change-Id: I9c4e976a0f41ba764da17bfb6359d416a3bef172 --- .../python/keras/engine/training_test.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 654435d8edf..bb6bfc32921 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1562,16 +1562,30 @@ class TrainingTest(keras_parameterized.TestCase): # assign_add not called. self.assertEqual(self.evaluate(layer.v), 1.) - @keras_parameterized.run_all_keras_modes(always_skip_v1=True) - @parameterized.named_parameters(('numpy', True), ('dataset', False)) - def test_single_input_no_tuple_wrapping(self, use_numpy): + @keras_parameterized.run_all_keras_modes( + always_skip_v1=True, + # TODO(kaftan): this is failing with KerasTensors + # in a way that seems orthogonal to what the code is testing + skip_keras_tensors=True) + @parameterized.named_parameters( + ('numpy_array', 'numpy_array'), + ('dataset_array', 'dataset_array'), + ('dataset_dict', 'dataset_dict')) + def test_single_input_no_tuple_wrapping(self, input_type): x = np.ones((10, 1)) - if use_numpy: + if input_type == 'numpy_array': batch_size = 3 - else: + expected_data_type = ops.Tensor + elif input_type == 'dataset_array': x = dataset_ops.Dataset.from_tensor_slices(x).batch(3) batch_size = None + expected_data_type = ops.Tensor + else: + x = {'my_input': x} + x = dataset_ops.Dataset.from_tensor_slices(x).batch(3) + batch_size = None + expected_data_type = dict test_case = self @@ -1579,18 +1593,18 @@ class TrainingTest(keras_parameterized.TestCase): def train_step(self, data): # No tuple wrapping for single x input and no targets. - test_case.assertIsInstance(data, ops.Tensor) + test_case.assertIsInstance(data, expected_data_type) return super(MyModel, self).train_step(data) def test_step(self, data): - test_case.assertIsInstance(data, ops.Tensor) + test_case.assertIsInstance(data, expected_data_type) return super(MyModel, self).test_step(data) def predict_step(self, data): - test_case.assertIsInstance(data, ops.Tensor) + test_case.assertIsInstance(data, expected_data_type) return super(MyModel, self).predict_step(data) - inputs = layers_module.Input(1) + inputs = layers_module.Input(shape=(1,), name='my_input') outputs = layers_module.Dense(1)(inputs) model = MyModel(inputs, outputs) model.add_loss(math_ops.reduce_sum(outputs)) From 894f1324dd2abad859b7ac9e74d9a70d363d9a57 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 16:30:50 -0700 Subject: [PATCH 155/178] Allow both Enter(data, control) as well as Enter(data). PiperOrigin-RevId: 315584317 Change-Id: I50e8651ccbf0957a7edf1ef958aa398235867292 --- .../compiler/mlir/tensorflow/ir/tf_executor.cc | 12 +++++++----- .../mlir/tensorflow/tests/tf_executor_ops.mlir | 11 +++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc index 9daebc22ba1..3403651eef8 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc @@ -811,11 +811,13 @@ ParseResult ParseEnterOp(OpAsmParser &parser, OperationState &result) { // fully qualified) or a short form with a single type (in which case the data // input and the outputs are all using this type). if (FunctionType type = types.front().dyn_cast()) { - if (type.getNumInputs() != 1) - return parser.emitError(parser.getNameLoc()) - << " expects a single data type"; - result.types.assign(type.getResults().begin(), type.getResults().end()); - types.assign(type.getInputs().begin(), type.getInputs().end()); + // One data input, and any number of control inputs. + if (type.getNumInputs() >= 1) { + result.types.assign(type.getResults().begin(), type.getResults().end()); + types.assign(type.getInputs().begin(), type.getInputs().end()); + } else { + return parser.emitError(parser.getNameLoc()) << " expects a data input"; + } } else { Type control_type = ControlType::get(context); types.append(op_infos.size() - 1, control_type); diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir index c048db5a5ee..27b84724b4a 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_executor_ops.mlir @@ -416,6 +416,17 @@ func @enter_control(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf32> { return %0 : tensor<*xf32> } +// CHECK-LABEL: func @enter_control_longform(%{{.*}}: tensor<*xf32>, %{{.*}}: tensor) -> tensor<*xf32> { +func @enter_control_longform(%arg0: tensor<*xf32>, %arg1: tensor) -> tensor<*xf32> { + %0 = tf_executor.graph { + %1:3 = tf_executor.Switch %arg0, %arg1 : tensor<*xf32> +// CHECK: tf_executor.Enter %{{.*}}, %{{.*}}, %{{.*}} frame "some/frame" : tensor<*xf32> + %res:2 = tf_executor.Enter %arg0, %1#2, %1#2 frame "some/frame" : (tensor<*xf32>, !tf_executor.control, !tf_executor.control) -> (tensor<*xf32>, !tf_executor.control) + tf_executor.fetch %res#0 : tensor<*xf32> + } + return %0 : tensor<*xf32> +} + // CHECK-LABEL: func @nextiteration(%{{.*}}: tensor<*xf32>, %{{.*}}: i1) -> tensor<*xf32> { func @nextiteration(%arg0: tensor<*xf32>, %arg1: i1) -> tensor<*xf32> { %0 = tf_executor.graph { From b8cce8c2c4b6322b11b93631453d5fa181b6d1f1 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Tue, 9 Jun 2020 16:40:47 -0700 Subject: [PATCH 156/178] Added a sub-class of tf.random.Generator in keras/layers/preprocessing/image_preprocessing.py to temporarily allow creating generators inside distribution strategies (with all replicas generating the same numbers). PiperOrigin-RevId: 315586245 Change-Id: If6d80d82aa5ba1828c8c00c7aef35ba12a871694 --- .../preprocessing/image_preprocessing.py | 26 +++++++++- .../preprocessing/image_preprocessing_test.py | 16 ++++++ tensorflow/python/ops/stateful_random_ops.py | 49 +++++++++---------- 3 files changed, 64 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py index 92de25a46e6..074652a0b79 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import image_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import stateful_random_ops from tensorflow.python.ops import stateless_random_ops +from tensorflow.python.ops import variables from tensorflow.python.util.tf_export import keras_export ResizeMethod = image_ops.ResizeMethod @@ -1292,11 +1293,32 @@ class RandomWidth(Layer): return dict(list(base_config.items()) + list(config.items())) +# TODO(b/147877541, b/158339556): This class is added to temporarily enable +# creating generators within distribution strategies. Remove it when the proper +# API is in place. +class _RandomGenerator(stateful_random_ops.Generator): + """A subclass that allows creation inside distribution strategies. + + This is a temporary solution to allow creating tf.random.Generator inside + distribution strategies. It will be removed when proper API is in place. + + All replicas will have the same RNG state and generate the same random + numbers. + """ + + def _create_variable(self, *args, **kwargs): + # This function does the same thing as the base class's namesake, except + # that it skips the distribution-strategy check. When we are inside a + # distribution-strategy scope, variables.Variable will pick a proper + # variable class (e.g. MirroredVariable). + return variables.Variable(*args, **kwargs) + + def make_generator(seed=None): if seed: - return stateful_random_ops.Generator.from_seed(seed) + return _RandomGenerator.from_seed(seed) else: - return stateful_random_ops.Generator.from_non_deterministic_state() + return _RandomGenerator.from_non_deterministic_state() def get_interpolation(interpolation): diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py index f00a9657039..24011225a29 100644 --- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py +++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py @@ -21,6 +21,7 @@ from __future__ import print_function from absl.testing import parameterized import numpy as np +from tensorflow.python.distribute.mirrored_strategy import MirroredStrategy from tensorflow.python.framework import errors from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import keras_parameterized @@ -962,6 +963,21 @@ class RandomRotationTest(keras_parameterized.TestCase): actual_output = layer(input_images, training=0) self.assertAllClose(expected_output, actual_output) + def test_distribution_strategy(self): + """Tests that RandomRotation can be created within distribution strategies. + + And that replicas got the same random result. + """ + input_images = np.random.random((2, 5, 8, 3)).astype(np.float32) + with tf_test_util.use_gpu(): + strat = MirroredStrategy(devices=['cpu', 'gpu']) + with strat.scope(): + layer = image_preprocessing.RandomRotation(.5) + output = strat.run(lambda: layer(input_images, training=True)) + values = output.values + self.assertAllEqual(2, len(values)) + self.assertAllClose(values[0], values[1]) + @tf_test_util.run_v2_only def test_config_with_custom_name(self): layer = image_preprocessing.RandomRotation(.5, name='image_preproc') diff --git a/tensorflow/python/ops/stateful_random_ops.py b/tensorflow/python/ops/stateful_random_ops.py index 9e8eba2e789..b6cbb229af4 100644 --- a/tensorflow/python/ops/stateful_random_ops.py +++ b/tensorflow/python/ops/stateful_random_ops.py @@ -255,27 +255,6 @@ class GeneratorSpec(type_spec.TypeSpec): return (self.shape, self.dtype, self.alg) -def _create_variable(*args, **kwargs): - """Creates a variable, and check that it's not MirroredVariable. - - Args: - *args: positional arguments passed along to `variables.Variable. - **kwargs: keyword arguments passed along to `variables.Variable. - - Returns: - The created variable. - """ - if ds_context.has_strategy(): - raise ValueError( - "Creating a generator within a strategy scope is disallowed, because " - "there is ambiguity on how to replicate a generator (e.g. should it be " - "copied so that each replica gets the same random numbers, or 'split' " - "so that each replica gets different random numbers).") - # TODO(wangpeng): Link to the RNG guide for solutions in such cases. - var = variables.Variable(*args, **kwargs) - return var - - @tf_export("random.Generator", "random.experimental.Generator") class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor): """Random-number generator. @@ -367,8 +346,8 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor): if copy_from is not None: # All other arguments should be None assert (alg or state) is None - self._state_var = _create_variable(copy_from.state, dtype=STATE_TYPE, - trainable=False) + self._state_var = self._create_variable(copy_from.state, dtype=STATE_TYPE, + trainable=False) self._alg = copy_from.algorithm else: @@ -380,10 +359,30 @@ class Generator(tracking.AutoTrackable, composite_tensor.CompositeTensor): else: state = _convert_to_state_tensor(state) _check_state_shape(state.shape, alg) - self._state_var = _create_variable(state, dtype=STATE_TYPE, - trainable=False) + self._state_var = self._create_variable(state, dtype=STATE_TYPE, + trainable=False) self._alg = alg + def _create_variable(self, *args, **kwargs): + """Creates a variable, and check that it's not MirroredVariable. + + Args: + *args: positional arguments passed along to `variables.Variable. + **kwargs: keyword arguments passed along to `variables.Variable. + + Returns: + The created variable. + """ + if ds_context.has_strategy(): + raise ValueError( + "Creating a generator within a strategy scope is disallowed, because " + "there is ambiguity on how to replicate a generator (e.g. should it " + "be copied so that each replica gets the same random numbers, or " + "'split' so that each replica gets different random numbers).") + # TODO(wangpeng): Link to the RNG guide for solutions in such cases. + var = variables.Variable(*args, **kwargs) + return var + @classmethod def from_state(cls, state, alg): """Creates a generator from a state. From ad027e2469cd23bba89b9a489d6cc7477ef48519 Mon Sep 17 00:00:00 2001 From: Jiho Choi Date: Tue, 9 Jun 2020 16:42:03 -0700 Subject: [PATCH 157/178] Fix FunctionRun's TraceMe to capture the scheduling of the function (instead of async execution of it) and use new TraceMe APIs. PiperOrigin-RevId: 315586487 Change-Id: I8d00bcf83ac20c5291ea5e0384b090c1ff71158e --- tensorflow/core/common_runtime/BUILD | 2 ++ tensorflow/core/common_runtime/eager/BUILD | 2 ++ .../common_runtime/eager/kernel_and_device.cc | 13 ++++++----- tensorflow/core/common_runtime/executor.cc | 22 +++++++++++++------ tensorflow/core/profiler/lib/BUILD | 1 + 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index ec9fe0ef688..df4a6c92385 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -582,8 +582,10 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", + "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", "@com_google_absl//absl/memory", ], alwayslink = 1, diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 625468b39d5..6698e41ccb0 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -290,7 +290,9 @@ KERNEL_AND_DEVICE_DEPS = [ "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", + "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:traceme", + "//tensorflow/core/profiler/lib:traceme_encode", "//tensorflow/core/grappler/optimizers:meta_optimizer", ] diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index 48dd6a718e8..c3d3d76fac5 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -40,7 +40,9 @@ limitations under the License. #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/setround.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" #if !defined(IS_MOBILE_PLATFORM) @@ -381,16 +383,17 @@ void KernelAndDeviceFunc::RunAsync( outputs->clear(); - profiler::TraceMe* activity = new profiler::TraceMe( + profiler::TraceMeProducer activity( + // To TraceMeConsumers in ExecutorState::Process/Finish. [&] { - return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id, - "#"); + return profiler::TraceMeEncode( + "FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/}); }, + profiler::ContextType::kTfExecutor, opts->step_id, profiler::TraceMeLevel::kInfo); pflr_->Run(*opts, handle_, inputs, outputs, - [opts, rendezvous, local_cm, step_container, this, activity, + [opts, rendezvous, local_cm, step_container, this, done = std::move(done)](const Status& s) { - delete activity; rendezvous->Unref(); if (step_container == nullptr) { this->step_container_.CleanUp(); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index af011ac95d8..5390d540502 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -65,8 +65,10 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" +#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/traceme.h" +#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" @@ -627,16 +629,19 @@ void ExecutorState::ProcessConstTensor( template void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { - profiler::TraceMe activity( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync. [&] { // NOTE: This tracing uses the iteration number from the first tagged // node that executes during this call to `Process()`. In principle, // subsequent nodes could have different values of `iter_num` that // will not be traced. - return absl::StrCat("ExecutorState::Process#id=", step_id_, - ",iter_num=", tagged_node.get_iter_num(), "#"); + return profiler::TraceMeEncode( + "ExecutorState::Process", + {{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}}); }, - 2); + step_id_, profiler::ContextType::kTfExecutor, + profiler::TraceMeLevel::kInfo); WithContext wc(context_); TaggedNodeSeq ready; TaggedNodeReadyQueue inline_ready; @@ -1240,11 +1245,14 @@ void ExecutorState::Finish() { } delete this; runner([step_id, status, done_cb = std::move(done_cb)]() { - profiler::TraceMe traceme( + profiler::TraceMeConsumer activity( + // From TraceMeProducer in KernelAndDeviceFunc::RunAsync. [&] { - return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); + return profiler::TraceMeEncode("ExecutorDoneCallback", + {{"id", step_id}}); }, - 2); + step_id, profiler::ContextType::kTfExecutor, + profiler::TraceMeLevel::kInfo); done_cb(status); }); return; diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 2e32552e076..0f92ffd5a70 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -175,6 +175,7 @@ filegroup( name = "mobile_srcs", srcs = [ "annotated_traceme.h", + "connected_traceme.h", "profiler_session.cc", "profiler_session.h", "scoped_annotation.h", From cb3a0b94503ff02e79edcd72696eefde28ebaf5c Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Tue, 9 Jun 2020 16:58:03 -0700 Subject: [PATCH 158/178] Adds sort_key_val to tf_numpy.extensions PiperOrigin-RevId: 315589546 Change-Id: Id9a4a4e218c74ac2c1085e4dde67fdd196fa2400 --- tensorflow/python/ops/numpy_ops/np_math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/numpy_ops/np_math_ops.py b/tensorflow/python/ops/numpy_ops/np_math_ops.py index dac60b63bfe..316f3a36a4c 100644 --- a/tensorflow/python/ops/numpy_ops/np_math_ops.py +++ b/tensorflow/python/ops/numpy_ops/np_math_ops.py @@ -1101,7 +1101,7 @@ def argsort(a, axis=-1, kind='quicksort', order=None): # pylint: disable=missin return sort_ops.argsort(a, axis, stable=stable) - tf_ans = control_flow_ops.cond( + tf_ans = np_utils.cond( math_ops.equal(array_ops.rank(a), 0), lambda: constant_op.constant([0]), lambda: _argsort(a, axis, stable)) From 7da5b33f6214d7db754d54d0000f6c548f03d7d5 Mon Sep 17 00:00:00 2001 From: Ran Chen Date: Tue, 9 Jun 2020 17:04:46 -0700 Subject: [PATCH 159/178] Deprecate tf.distribute.Strategy.experimental_make_numpy_dataset Use tf.data.Dataset.from_tensor_slices instead. PiperOrigin-RevId: 315590873 Change-Id: Ib63ca31d31522b7bd9aa6a14ddd9970a29086013 --- .../python/distribute/distribute_lib.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py index 5ea738765b7..9f426c90c13 100644 --- a/tensorflow/python/distribute/distribute_lib.py +++ b/tensorflow/python/distribute/distribute_lib.py @@ -689,8 +689,10 @@ class StrategyBase(object): return self.extended._make_input_fn_iterator( # pylint: disable=protected-access input_fn, replication_mode=replication_mode) + @deprecation.deprecated( + "2020-09-30", "Please use tf.data.Dataset.from_tensor_slices instead") def experimental_make_numpy_dataset(self, numpy_input): - """Makes a `tf.data.Dataset` for input provided via a numpy array. + """Makes a `tf.data.Dataset` from a numpy array. This avoids adding `numpy_input` as a large constant in the graph, and copies the data to the machine or machines that will be processing @@ -700,16 +702,19 @@ class StrategyBase(object): with the returned dataset to further distribute it with the strategy. Example: - ``` - numpy_input = np.ones([10], dtype=np.float32) - dataset = strategy.experimental_make_numpy_dataset(numpy_input) - dist_dataset = strategy.experimental_distribute_dataset(dataset) - ``` + + >>> strategy = tf.distribute.MirroredStrategy() + >>> numpy_input = np.ones([10], dtype=np.float32) + >>> dataset = strategy.experimental_make_numpy_dataset(numpy_input) + >>> dataset + + >>> dataset = dataset.batch(2) + >>> dist_dataset = strategy.experimental_distribute_dataset(dataset) Args: - numpy_input: A nest of NumPy input arrays that will be converted into a - dataset. Note that lists of Numpy arrays are stacked, as that is normal - `tf.data.Dataset` behavior. + numpy_input: a nest of NumPy input arrays that will be converted into a + dataset. Note that the NumPy arrays are stacked, as that is normal + `tf.data.Dataset` behavior. Returns: A `tf.data.Dataset` representing `numpy_input`. From c9c8b624e32fca340803d032c275b7437e02ed97 Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Tue, 9 Jun 2020 17:08:01 -0700 Subject: [PATCH 160/178] Add eager microbenchmarks for Conv layer overheads. PiperOrigin-RevId: 315591494 Change-Id: Ib70d4899123a92893e95303d94305a717a03b5b1 --- .../benchmark/eager_microbenchmarks_test.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py b/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py index 0894546b3b4..8d13fc3963e 100644 --- a/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py +++ b/tensorflow/python/keras/benchmark/eager_microbenchmarks_test.py @@ -22,7 +22,9 @@ import time from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.keras.engine import base_layer +from tensorflow.python.keras.layers import convolutional as conv_layers from tensorflow.python.keras.layers import core as core_layers +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test from tensorflow.python.util import tf_inspect @@ -120,6 +122,36 @@ class MicroBenchmarksBase(test.Benchmark): self._run(fn, 10000) + def benchmark_tf_keras_conv1d_overhead(self): + + layer = conv_layers.Conv1D(1, (1,)) + x = array_ops.ones((1, 1, 1)) + + def fn(): + layer(x) + + self._run(fn, 10000) + + def benchmark_tf_keras_conv2d_overhead(self): + + layer = conv_layers.Conv2D(1, (1, 1)) + x = array_ops.ones((1, 1, 1, 1)) + + def fn(): + layer(x) + + self._run(fn, 10000) + + def benchmark_tf_keras_conv3d_overhead(self): + + layer = conv_layers.Conv3D(1, (1, 1, 1)) + x = array_ops.ones((1, 1, 1, 1, 1)) + + def fn(): + layer(x) + + self._run(fn, 10000) + if __name__ == "__main__": ops.enable_eager_execution() From ec2fb44030e64c35ee1658a32abf4cf0b030abd9 Mon Sep 17 00:00:00 2001 From: Jaesung Chung Date: Tue, 9 Jun 2020 17:29:15 -0700 Subject: [PATCH 161/178] Allow tf.resource_name argument attributes in tf saved model dialect PiperOrigin-RevId: 315595041 Change-Id: Ib8dd9ba31f179532095e540ad279c5e71ebbe20c --- .../compiler/mlir/tensorflow/ir/tf_saved_model.cc | 7 +++++-- .../mlir/tensorflow/tests/tf_saved_model_ops.mlir | 13 +++++++++++++ .../tests/tf_saved_model_ops_invalid.mlir | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc index 2deed928ba3..b73c1f5edc5 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc @@ -254,9 +254,12 @@ LogicalResult VerifyExportedFunc(FuncOp func) { } continue; } + if (func.getArgAttr(i, "tf.resource_name")) { + continue; + } return func.emitError() - << "all arguments should have 'tf_saved_model.index_path' or " - "'tf_saved_model.bound_input' attributes"; + << "all arguments should have 'tf_saved_model.index_path', " + "'tf_saved_model.bound_input' or 'tf.resource_name' attributes"; } llvm::SmallDenseSet unique_bound_inputs; for (int i = 0, e = func.getNumArguments(); i < e; i++) { diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir index 0ce3e69575b..b16925cdb58 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops.mlir @@ -40,3 +40,16 @@ module attributes {tf_saved_model.semantics} { } } + +// ----- + +module attributes {tf_saved_model.semantics} { + + // CHECK: func @f + func @f( + %arg0: tensor {tf.resource_name = "resource"} + ) attributes { tf_saved_model.exported_names = ["foo.some_func"] } { + return + } + +} diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir index f892b5dfb8b..c055c6c9f56 100644 --- a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir +++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model_ops_invalid.mlir @@ -120,7 +120,7 @@ module attributes {tf_saved_model.semantics} { module attributes {tf_saved_model.semantics} { - // expected-error@+1 {{all arguments should have 'tf_saved_model.index_path' or 'tf_saved_model.bound_input' attributes}} + // expected-error@+1 {{all arguments should have 'tf_saved_model.index_path', 'tf_saved_model.bound_input' or 'tf.resource_name' attributes}} func @f( %arg0: tensor ) attributes { tf_saved_model.exported_names = ["f"] } { From de0a617f4ef321ebf579a4539cec85690f55fb4c Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Tue, 9 Jun 2020 18:00:21 -0700 Subject: [PATCH 162/178] Do non-fused mixed precision BatchNormalization in float32. It's very easy for variance to overflow in float16. This causes the output of BatchNormalization to be 0. Doing the computations in float32 and casting the output back to float16 solves the issue. The output has very little risk of underflow or overflow as the square root of variance is taken before being used. PiperOrigin-RevId: 315599303 Change-Id: I0b7162b01ec748eb003a465a26215d1772d43cac --- .../python/keras/layers/normalization.py | 12 ++++++++++-- .../python/keras/layers/normalization_test.py | 18 ++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py index 61e134e3d94..e5723a3ef98 100644 --- a/tensorflow/python/keras/layers/normalization.py +++ b/tensorflow/python/keras/layers/normalization.py @@ -724,6 +724,13 @@ class BatchNormalizationBase(Layer): outputs = undo_virtual_batching(outputs) return outputs + inputs_dtype = inputs.dtype.base_dtype + if inputs_dtype in (dtypes.float16, dtypes.bfloat16): + # Do all math in float32 if given 16-bit inputs for numeric stability. + # In particular, it's very easy for variance to overflow in float16 and + # for safety we also choose to cast bfloat16 to float32. + inputs = math_ops.cast(inputs, dtypes.float32) + # Compute the axes along which to reduce the mean / variance input_shape = inputs.shape ndims = len(input_shape) @@ -852,11 +859,12 @@ class BatchNormalizationBase(Layer): offset = math_ops.cast(offset, inputs.dtype) if scale is not None: scale = math_ops.cast(scale, inputs.dtype) - # TODO(reedwm): Maybe do math in float32 if given float16 inputs, if doing - # math in float16 hurts validation accuracy of popular models like resnet. outputs = nn.batch_normalization(inputs, _broadcast(mean), _broadcast(variance), offset, scale, self.epsilon) + if inputs_dtype in (dtypes.float16, dtypes.bfloat16): + outputs = math_ops.cast(outputs, inputs_dtype) + # If some components of the shape got lost due to adjustments, fix that. outputs.set_shape(input_shape) diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py index e14977edfc4..ef43bcf5d22 100644 --- a/tensorflow/python/keras/layers/normalization_test.py +++ b/tensorflow/python/keras/layers/normalization_test.py @@ -146,7 +146,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase): normalization_v2.BatchNormalization, dtype='float32') @keras_parameterized.run_all_keras_modes - def test_batchnorm_mixed_precision(self): + def test_batchnorm_float16(self): _run_batchnorm_correctness_test( normalization.BatchNormalization, dtype='float16') _run_batchnorm_correctness_test( @@ -154,7 +154,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase): @combinations.generate(combinations.combine(mode=['graph', 'eager'])) @testing_utils.enable_v2_dtype_behavior - def test_batchnorm_policy(self): + def test_batchnorm_mixed_precision(self): norm = keras.layers.BatchNormalization( axis=-1, input_shape=(4, 4, 3), @@ -166,6 +166,20 @@ class BatchNormalizationTest(keras_parameterized.TestCase): self.assertEqual(norm.beta.dtype.base_dtype, 'float32') self.assertEqual(norm.gamma.dtype.base_dtype, 'float32') + @combinations.generate(combinations.combine(mode=['graph', 'eager'], + fused=[True, False])) + @testing_utils.enable_v2_dtype_behavior + def test_batchnorm_mixed_precision_does_not_overflow(self, fused): + norm = keras.layers.BatchNormalization( + axis=-1, + input_shape=(1, 1, 1), + fused=fused, + dtype=policy.Policy('mixed_float16')) + x = np.array([-1000., 1000.]).reshape((2, 1, 1, 1)) + y = norm(x, training=True) + expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1)) + self.assertAllClose(keras.backend.eval(y), expected_y) + @keras_parameterized.run_all_keras_modes(always_skip_v1=True) def test_batchnorm_non_trainable_with_fit(self): # We use the same data shape for all the data we use in this test. From 81f28b5ad92df51435d8e800bab0ddd85a77ffd8 Mon Sep 17 00:00:00 2001 From: Tomer Kaftan Date: Tue, 9 Jun 2020 18:04:47 -0700 Subject: [PATCH 163/178] Fix bug where v1 vs v2 model class swapping does not trigger correctly in rare circumstances. Specifically: If model is subclassed and the subclass initializes like a functional model but does not explicitly extend the internal `Functional` class, class swapping according to if eager execution is enabled did not trigger on `Functional` the first time the model is constructed. This is unlikely to be an issue in practice unless code interleaves v1 eager-disabled code and v2 code a lot, and uses subclass models that initialize like functional models. We found it to cause rare nondeterministic test failures in our test suites, because we regularly test both legacy v1 graphs & v2 code. PiperOrigin-RevId: 315599954 Change-Id: I20eb52c67e5af12696425b93eeebe3664f8785ea --- tensorflow/python/keras/engine/training.py | 5 ++++ .../python/keras/utils/version_utils_test.py | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 29ff31d56db..d7918f1a1e1 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -134,6 +134,7 @@ def disable_multi_worker(method): def inject_functional_model_class(cls): + """Inject `Functional` into the hierarchy of this class if needed.""" from tensorflow.python.keras.engine import functional # pylint: disable=g-import-not-at-top from tensorflow.python.keras.engine import training_v1 # pylint: disable=g-import-not-at-top if cls == Model or cls == training_v1.Model: @@ -141,6 +142,10 @@ def inject_functional_model_class(cls): cls.__bases__ = tuple(inject_functional_model_class(base) for base in cls.__bases__) + # Trigger any `__new__` class swapping that needed to happen on `Functional` + # but did not because functional was not in the class hierarchy. + cls.__new__(cls) + return cls diff --git a/tensorflow/python/keras/utils/version_utils_test.py b/tensorflow/python/keras/utils/version_utils_test.py index 0a3cd53f3c0..41370e316af 100644 --- a/tensorflow/python/keras/utils/version_utils_test.py +++ b/tensorflow/python/keras/utils/version_utils_test.py @@ -56,6 +56,35 @@ class SplitUtilsTest(keras_parameterized.TestCase): self._check_model_class(model.__class__.__bases__[0]) self._check_layer_class(model) + def test_subclass_model_with_functional_init(self): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + + class MyModel(keras.Model): + pass + + model = MyModel(inputs, outputs) + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + + def test_subclass_model_with_functional_init_interleaved_v1_functional(self): + with ops.Graph().as_default(): + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + _ = keras.Model(inputs, outputs) + + inputs = keras.Input(10) + outputs = keras.layers.Dense(1)(inputs) + + class MyModel(keras.Model): + pass + + model = MyModel(inputs, outputs) + model_class = model.__class__.__bases__[0].__bases__[0] + self._check_model_class(model_class) + self._check_layer_class(model) + def test_sequential_model(self): model = keras.Sequential([keras.layers.Dense(1)]) model_class = model.__class__.__bases__[0].__bases__[0] From 4738f8a6e6d1e9287f4c754f79b7895a07ef048f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 18:10:12 -0700 Subject: [PATCH 164/178] Fix FunctionRun's TraceMe to capture the scheduling of the function (instead of async execution of it) and use new TraceMe APIs. PiperOrigin-RevId: 315600594 Change-Id: I4ae7f90b4f2570c0595bfb9f1a6123fbeb0e84dd --- tensorflow/core/common_runtime/BUILD | 2 -- tensorflow/core/common_runtime/eager/BUILD | 2 -- .../common_runtime/eager/kernel_and_device.cc | 13 +++++------ tensorflow/core/common_runtime/executor.cc | 22 ++++++------------- tensorflow/core/profiler/lib/BUILD | 1 - 5 files changed, 12 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/common_runtime/BUILD b/tensorflow/core/common_runtime/BUILD index df4a6c92385..ec9fe0ef688 100644 --- a/tensorflow/core/common_runtime/BUILD +++ b/tensorflow/core/common_runtime/BUILD @@ -582,10 +582,8 @@ cc_library( "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", - "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:scoped_annotation", "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/profiler/lib:traceme_encode", "@com_google_absl//absl/memory", ], alwayslink = 1, diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD index 6698e41ccb0..625468b39d5 100644 --- a/tensorflow/core/common_runtime/eager/BUILD +++ b/tensorflow/core/common_runtime/eager/BUILD @@ -290,9 +290,7 @@ KERNEL_AND_DEVICE_DEPS = [ "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/profiler/lib:annotated_traceme", - "//tensorflow/core/profiler/lib:connected_traceme", "//tensorflow/core/profiler/lib:traceme", - "//tensorflow/core/profiler/lib:traceme_encode", "//tensorflow/core/grappler/optimizers:meta_optimizer", ] diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc index c3d3d76fac5..48dd6a718e8 100644 --- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc +++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc @@ -40,9 +40,7 @@ limitations under the License. #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/setround.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" -#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" #if !defined(IS_MOBILE_PLATFORM) @@ -383,17 +381,16 @@ void KernelAndDeviceFunc::RunAsync( outputs->clear(); - profiler::TraceMeProducer activity( - // To TraceMeConsumers in ExecutorState::Process/Finish. + profiler::TraceMe* activity = new profiler::TraceMe( [&] { - return profiler::TraceMeEncode( - "FunctionRun", {{"id", opts->step_id}, {"$r", 1} /*root_event*/}); + return absl::StrCat("FunctionRun#name=", name(), ",id=", opts->step_id, + "#"); }, - profiler::ContextType::kTfExecutor, opts->step_id, profiler::TraceMeLevel::kInfo); pflr_->Run(*opts, handle_, inputs, outputs, - [opts, rendezvous, local_cm, step_container, this, + [opts, rendezvous, local_cm, step_container, this, activity, done = std::move(done)](const Status& s) { + delete activity; rendezvous->Unref(); if (step_container == nullptr) { this->step_container_.CleanUp(); diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 5390d540502..af011ac95d8 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -65,10 +65,8 @@ limitations under the License. #include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/profiler/lib/annotated_traceme.h" -#include "tensorflow/core/profiler/lib/connected_traceme.h" #include "tensorflow/core/profiler/lib/scoped_annotation.h" #include "tensorflow/core/profiler/lib/traceme.h" -#include "tensorflow/core/profiler/lib/traceme_encode.h" #include "tensorflow/core/protobuf/error_codes.pb.h" #include "tensorflow/core/util/tensor_slice_reader_cache.h" @@ -629,19 +627,16 @@ void ExecutorState::ProcessConstTensor( template void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) { - profiler::TraceMeConsumer activity( - // From TraceMeProducer in KernelAndDeviceFunc::RunAsync. + profiler::TraceMe activity( [&] { // NOTE: This tracing uses the iteration number from the first tagged // node that executes during this call to `Process()`. In principle, // subsequent nodes could have different values of `iter_num` that // will not be traced. - return profiler::TraceMeEncode( - "ExecutorState::Process", - {{"id", step_id_}, {"iter_num", tagged_node.get_iter_num()}}); + return absl::StrCat("ExecutorState::Process#id=", step_id_, + ",iter_num=", tagged_node.get_iter_num(), "#"); }, - step_id_, profiler::ContextType::kTfExecutor, - profiler::TraceMeLevel::kInfo); + 2); WithContext wc(context_); TaggedNodeSeq ready; TaggedNodeReadyQueue inline_ready; @@ -1245,14 +1240,11 @@ void ExecutorState::Finish() { } delete this; runner([step_id, status, done_cb = std::move(done_cb)]() { - profiler::TraceMeConsumer activity( - // From TraceMeProducer in KernelAndDeviceFunc::RunAsync. + profiler::TraceMe traceme( [&] { - return profiler::TraceMeEncode("ExecutorDoneCallback", - {{"id", step_id}}); + return absl::StrCat("ExecutorDoneCallback#id=", step_id, "#"); }, - step_id, profiler::ContextType::kTfExecutor, - profiler::TraceMeLevel::kInfo); + 2); done_cb(status); }); return; diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD index 0f92ffd5a70..2e32552e076 100644 --- a/tensorflow/core/profiler/lib/BUILD +++ b/tensorflow/core/profiler/lib/BUILD @@ -175,7 +175,6 @@ filegroup( name = "mobile_srcs", srcs = [ "annotated_traceme.h", - "connected_traceme.h", "profiler_session.cc", "profiler_session.h", "scoped_annotation.h", From bbe0c4590c90a83dde1c24b91284ef370ba06eb0 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 9 Jun 2020 19:10:24 -0700 Subject: [PATCH 165/178] [XLA:GPU] Support buffer aliasing for XLA:GPU This is the change on XLA side, the change on the TF/XLA bridge is still TBD. Dropping the check in xla_launch_util, as aliased buffers are no longer represented as nulls. PiperOrigin-RevId: 315607922 Change-Id: I24903d9288604cff142a7f0872d924a5da621e49 --- tensorflow/compiler/jit/xla_launch_util.cc | 4 - .../xla/service/gpu/gpu_executable.cc | 188 +++++++++++------- .../xla/tests/buffer_donation_test.cc | 4 +- 3 files changed, 120 insertions(+), 76 deletions(-) diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc index 8c24f182f5c..209220938ed 100644 --- a/tensorflow/compiler/jit/xla_launch_util.cc +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -468,10 +468,6 @@ Status XlaComputationLaunchContext::PopulateOutputs( << "Invalid input for outputs " << i << ": " << input_index; ctx->set_output(i, ctx->input(input_index)); } else { - if (MustAliasOutput(input_output_alias, output_num)) { - DCHECK(output.buffer({output_num}).is_null()) - << "Expected output buffer to be aliased, but it is not nil."; - } if (allocate_xla_tensors_) { TF_RETURN_IF_ERROR(SetBufferForTensorUnderAllocateXlaTensors( input_output_alias, output_num, ctx, i, shape, &output, diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index bf65df20544..c8b11cab31a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -360,6 +360,27 @@ StatusOr GpuExecutable::BufferForAllocation( } } +static Status CheckAlignment(const BufferAllocation& allocation, + se::DeviceMemoryBase buffer, int arg_idx) { + const int64 expected_alignment = [&] { + if (allocation.is_entry_computation_parameter()) { + return kEntryParameterAlignBytes; + } else if (allocation.is_constant()) { + return kConstantBufferAlignBytes; + } else { + return kXlaAllocatedBufferAlignBytes; + } + }(); + if (!buffer.is_null() && + reinterpret_cast(buffer.opaque()) % expected_alignment != 0) { + return InternalError( + "Address of buffer %d must be a multiple of %x, but " + "was %p", + arg_idx, expected_alignment, buffer.opaque()); + } + return Status::OK(); +} + StatusOr GpuExecutable::GenerateBufferAllocations( absl::Span arguments, const GpuExecutable::BufferAllocToDeviceMemoryMap* globals, @@ -378,28 +399,37 @@ StatusOr GpuExecutable::GenerateBufferAllocations( se::DeviceMemoryBase buffer, BufferForAllocation(arguments, globals, allocation, memory_allocator, executor->device_ordinal(), i)); - const int64 expected_alignment = [&] { - if (allocation.is_entry_computation_parameter()) { - return kEntryParameterAlignBytes; - } else if (allocation.is_constant()) { - return kConstantBufferAlignBytes; - } else { - return kXlaAllocatedBufferAlignBytes; - } - }(); - if (!buffer.is_null() && - reinterpret_cast(buffer.opaque()) % expected_alignment != - 0) { - return InternalError( - "Address of buffer %d must be a multiple of %x, but " - "was %p", - i, expected_alignment, buffer.opaque()); - } buffers.push_back(buffer); + TF_RETURN_IF_ERROR(CheckAlignment(allocation, buffer, i)); } return {{buffers, executor->device_ordinal(), memory_allocator}}; } +// Returns `true` if the entire tuple contents is aliased. +static bool EntireTupleContentsAliased( + const Shape& output_shape, const ShapeIndex& index, + const HloInputOutputAliasConfig& alias_config) { + const Shape& indexed_shape = ShapeUtil::GetSubshape(output_shape, index); + if (!indexed_shape.IsTuple()) { + return false; + } + bool all_aliased = true; + ShapeUtil::ForEachSubshape( + indexed_shape, [&](const Shape& subshape, const ShapeIndex& subindex) { + if (subindex.empty()) { + return; + } + std::vector full_index; + absl::c_copy(index, std::back_inserter(full_index)); + absl::c_copy(subindex, std::back_inserter(full_index)); + if (!alias_config.OutputHasAlias( + ShapeIndex(full_index.begin(), full_index.end()))) { + all_aliased = false; + } + }); + return all_aliased; +} + StatusOr GpuExecutable::ExecuteAsyncOnStream( const ServiceExecutableRunOptions* run_options, std::vector arguments, @@ -425,84 +455,102 @@ StatusOr GpuExecutable::ExecuteAsyncOnStream( } se::StreamExecutor* executor = run_options->stream()->parent(); - TF_ASSIGN_OR_RETURN(BufferAllocations buffer_allocations, - GenerateBufferAllocations(arguments, globals, - memory_allocator, executor)); - - for (Thunk* thunk : thunk_schedule_->TotalOrder()) { - TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor)); - } - VLOG(2) << buffer_allocations.ToString(); - TF_RETURN_IF_ERROR(ExecuteThunks(run_options, buffer_allocations, - block_host_until_done, - hlo_execution_profile)); HloInstruction* root = hlo_module_->entry_computation()->root_instruction(); auto device_ordinal = executor->device_ordinal(); - ExecutionOutput result(root->shape(), root->shape(), memory_allocator, + ExecutionOutput result(/*on_host_shape=*/root->shape(), + /*on_device_shape=*/root->shape(), memory_allocator, device_ordinal); + TF_ASSIGN_OR_RETURN(BufferAllocations buffer_allocations, + GenerateBufferAllocations(arguments, globals, + memory_allocator, executor)); + VLOG(2) << buffer_allocations.ToString(); std::set buffers_in_result; for (auto& p : result.MutableResult()->buffers()) { const ShapeIndex& index = p.first; - se::DeviceMemoryBase& device_memory = p.second; + se::DeviceMemoryBase& result_buffer = p.second; const auto& sources = GetRootValueSet().element(index); // The points-to set is unambiguous so the set should be a // singleton. That is, we know exactly which instruction // produced the array at this element. CHECK_EQ(1, sources.values().size()); - auto src_hlo = sources.values()[0]->instruction(); + HloInstruction* src_hlo = sources.values()[0]->instruction(); VLOG(4) << "Looking at: " << sources.values()[0]; - // The source instruction should have a non-parameter buffer - // assigned. - TF_ASSIGN_OR_RETURN( - const BufferAllocation::Slice slice, - assignment_->GetUniqueSlice(src_hlo, sources.values()[0]->index())); - - se::DeviceMemoryBase src_base = - buffer_allocations.GetDeviceAddress(slice.index()); - CHECK(!src_base.is_null() || src_base.size() == 0); - if (!slice.allocation()->is_entry_computation_parameter()) { - // If the buffer coming out of the result is from a parameter, it - // means the caller aliased some parameter buffer to an output one - // (via the HloInputOutputAliasConfig API). If that is the case, the - // caller will receive a partially complete scoped shaped buffer, - // which they will have to fill up on return. - // Unfortunately the interface to the execute APIs are ShapedBuffer - // pointer based, which assumes caller ownership, and hence a buffer - // coming from there cannot be part of the new ScopedShapedBuffer we - // create for the result (which assumes ownership). - device_memory = src_base; - } else { - const HloInputOutputAliasConfig& input_output_alias = - module().input_output_alias_config(); - auto output_alias = input_output_alias.GetAliasedOutput( - slice.allocation()->parameter_number(), - slice.allocation()->param_shape_index()); - CHECK(output_alias) << "Output buffer is coming from parameter " - << slice.allocation()->parameter_number() - << " at index " - << slice.allocation()->param_shape_index() - << ", but no alias exists"; - CHECK_EQ(*output_alias, index); + const HloInputOutputAliasConfig& input_output_alias = + module().input_output_alias_config(); + absl::optional alias = + input_output_alias.GetAliasedParameter(index); + if (alias) { + CHECK_LT(alias->parameter_number, arguments.size()); + ExecutionInput& input = arguments[alias->parameter_number]; + MaybeOwningDeviceMemory* maybe_owning_memory = + input.MutableBuffer(alias->parameter_index); + if (absl::optional owning = + maybe_owning_memory->Release()) { + // If the caller passes the ownership of the device memory, reuse it + // as the output buffer. It is up to the caller whether or not to + // donate a buffer; the aliasing information describes which buffers + // may alias, not buffers that must alias. + se::DeviceMemoryBase argument_buffer = owning->Release(); + *maybe_owning_memory = argument_buffer; + result_buffer = argument_buffer; + if (alias->kind == HloInputOutputAliasConfig::kUserAlias) { + // This is a user alias, so a must alias. The caller is giving us the + // input buffer, but in case of error from the execute call, we should + // not be releasing it as it contains valid data (for example, it is a + // parameter which the user wants us to alias, in a gradient update + // computation). So we store the index into the result in the aliased + // vector, which will be fed to the ExecutionOutput, which will use + // the indices to drop the addresses from its own ScopedShapedBuffer + // result, if the ExecutionOutput is not committed. + result.AddAliasedIndex(index); + } + } } - buffers_in_result.insert(src_base); + + if (result_buffer.is_null()) { + // The source instruction should have a non-parameter buffer + // assigned. + TF_ASSIGN_OR_RETURN( + const BufferAllocation::Slice slice, + assignment_->GetUniqueSlice(src_hlo, sources.values()[0]->index())); + result_buffer = buffer_allocations.GetDeviceAddress(slice.index()); + + // If the entire tuple contents is aliased, the copy insertion will *not* + // materialize a new tuple, so we mark it as aliased as well. + if (EntireTupleContentsAliased(root->shape(), index, + input_output_alias)) { + result.AddAliasedIndex(index); + } + } + buffers_in_result.insert(result_buffer); } + + for (Thunk* thunk : thunk_schedule_->TotalOrder()) { + TF_RETURN_IF_ERROR(thunk->Initialize(*this, executor)); + } + TF_RETURN_IF_ERROR(ExecuteThunks(run_options, buffer_allocations, + block_host_until_done, + hlo_execution_profile)); + + // Free all temporary allocations. TF_RETURN_IF_ERROR( buffer_allocations.TearDown(buffers_in_result, assignment_.get())); - std::vector buffers_to_free; - for (auto& argument : arguments) { + // Free allocations for arguments. + for (ExecutionInput& argument : arguments) { for (auto& index_buffer : *argument.MutableBuffers()) { - auto maybe_owning_buffer = index_buffer.second.Release(); - if (maybe_owning_buffer) { - buffers_to_free.push_back(std::move(*maybe_owning_buffer)); + if (absl::optional owning = + index_buffer.second.Release()) { + result.AddToBeReleased(std::move(*owning)); } } } - return result; + + return std::move(result); } const InstructionValueSet& GpuExecutable::GetRootValueSet() const { diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index be76fa74ae2..af2c0b1b3ce 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -216,8 +216,8 @@ TEST_F(BufferDonationTest, SimpleWhileTupleTest) { HloInstruction::CreateGetTupleElement(f32v1_, while0, 1)); builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); module->AddEntryComputation(builder.Build()); - // Input output aliasing is only supported on TPU. -#if defined(XLA_TEST_BACKEND_TPU) + // Input output aliasing is supported on CPU and GPU. +#if defined(XLA_TEST_BACKEND_TPU) || defined(XLA_TEST_BACKEND_GPU) TF_ASSERT_OK(module->input_output_alias_config().SetUpAlias({0}, 0, {0})); TF_ASSERT_OK(module->input_output_alias_config().SetUpAlias({1}, 0, {1})); #endif From 0d40f59242fa683a408533ac563deb81b97166fa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 19:15:05 -0700 Subject: [PATCH 166/178] [TF:XLA] Update an error message to include the word "uninitialized" because many tests depend on that word being in the error message. Additional update tests that search for an outdated error message to just search for "uninitialized". PiperOrigin-RevId: 315608436 Change-Id: If696f9545f4a55d5268f842d856273781afaffe4 --- tensorflow/compiler/tests/variable_ops_test.py | 2 +- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 4 ++-- tensorflow/python/kernel_tests/resource_variable_ops_test.py | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/tests/variable_ops_test.py b/tensorflow/compiler/tests/variable_ops_test.py index aeafc993a5b..2514a0a9dc4 100644 --- a/tensorflow/compiler/tests/variable_ops_test.py +++ b/tensorflow/compiler/tests/variable_ops_test.py @@ -486,7 +486,7 @@ class SliceAssignTest(xla_test.XLATestCase): def testUninitialized(self): with self.assertRaisesRegexp(errors.FailedPreconditionError, - "Read variable failure"): + "uninitialized"): with self.session() as sess, self.test_scope(): v = resource_variable_ops.ResourceVariable([1, 2]) sess.run(v[:].assign([1, 2])) diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index a72e3fd44dd..27766408716 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -415,7 +415,7 @@ Status ReadVariableInputTensor(const Tensor& tensor, DataType type, if (!variable->initialized()) { return errors::FailedPrecondition( "Read variable failure ", variable->name(), - ". It could mean the variable is not initialized or the variable is on " + ". It could mean the variable is uninitialized or the variable is on " "another device "); } if (variable->type() != type) { @@ -468,7 +468,7 @@ Status XlaOpKernelContext::GetVariableTypeAndShape(int index, DataType* type, if (!variable->initialized()) { return errors::InvalidArgument( "Read variable failure ", variable->name(), - ". It could mean the variable is not initialized or the variable is on " + ". It could mean the variable is uninitialized or the variable is on " "another device "); } *type = variable->type(); diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index bf229943fd4..b45e9dfb2bc 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -1072,8 +1072,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase, dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var5", container=ops.get_default_graph()._container) with self.assertRaisesOpError( - "(Resource .*/var5/.* does not exist|Read of uninitialized variable)" - ): + "(Resource .*/var5/.* does not exist|uninitialized)"): resource_variable_ops.read_variable_op(x, v.dtype.base_dtype).eval() @test_util.run_deprecated_v1 From c0875c5cfa97325ec2bcb4fa5cba1f3a30a46ce7 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Tue, 9 Jun 2020 19:46:46 -0700 Subject: [PATCH 167/178] Better align LLVM Bazel library names with upstream and internal names PiperOrigin-RevId: 315611650 Change-Id: Iddabf75a91051d43bd98be6d25d682794da2fe6b --- third_party/llvm/llvm.autogenerated.BUILD | 125 +++++++++++++--------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index ecc9e48bfb2..2857de01ecc 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -151,7 +151,7 @@ gentbl( ) gentbl( - name = "instcombine_transforms_gen", + name = "InstCombineTableGen", tbl_outs = [( "-gen-searchable-tables", "lib/Transforms/InstCombine/InstCombineTables.inc", @@ -383,15 +383,20 @@ gentbl( ) cc_library( - name = "utils_tablegen", + name = "tblgen", srcs = glob([ + "utils/TableGen/*.cpp", + "utils/TableGen/*.h", "utils/TableGen/GlobalISel/*.cpp", ]), hdrs = glob([ "utils/TableGen/GlobalISel/*.h", ]), deps = [ - ":tablegen", + ":MC", + ":Support", + ":TableGen", + ":config", ], ) @@ -431,6 +436,7 @@ llvm_target_list = [ "name": "AArch64", "lower_name": "aarch64", "short_name": "AArch64", + "dir_name": "AArch64", "tbl_outs": [ ("-gen-register-bank", "lib/Target/AArch64/AArch64GenRegisterBank.inc"), ("-gen-register-info", "lib/Target/AArch64/AArch64GenRegisterInfo.inc"), @@ -443,7 +449,8 @@ llvm_target_list = [ ("-gen-dag-isel", "lib/Target/AArch64/AArch64GenDAGISel.inc"), ("-gen-fast-isel", "lib/Target/AArch64/AArch64GenFastISel.inc"), ("-gen-global-isel", "lib/Target/AArch64/AArch64GenGlobalISel.inc"), - ("-gen-global-isel-combiner -combiners=AArch64PreLegalizerCombinerHelper", "lib/Target/AArch64/AArch64GenGICombiner.inc"), + ("-gen-global-isel-combiner -combiners=AArch64PreLegalizerCombinerHelper", "lib/Target/AArch64/AArch64GenPreLegalizeGICombiner.inc"), + ("-gen-global-isel-combiner -combiners=AArch64PostLegalizerCombinerHelper", "lib/Target/AArch64/AArch64GenPostLegalizeGICombiner.inc"), ("-gen-callingconv", "lib/Target/AArch64/AArch64GenCallingConv.inc"), ("-gen-subtarget", "lib/Target/AArch64/AArch64GenSubtargetInfo.inc"), ("-gen-disassembler", "lib/Target/AArch64/AArch64GenDisassemblerTables.inc"), @@ -454,44 +461,30 @@ llvm_target_list = [ "name": "AMDGPU", "lower_name": "amdgpu", "short_name": "AMDGPU", + "dir_name": "AMDGPU", "tbl_outs": [ ("-gen-register-bank", "lib/Target/AMDGPU/AMDGPUGenRegisterBank.inc"), ("-gen-register-info", "lib/Target/AMDGPU/AMDGPUGenRegisterInfo.inc"), ("-gen-instr-info", "lib/Target/AMDGPU/AMDGPUGenInstrInfo.inc"), + ("-gen-emitter", "lib/Target/AMDGPU/AMDGPUGenMCCodeEmitter.inc"), + ("-gen-pseudo-lowering", "lib/Target/AMDGPU/AMDGPUGenMCPseudoLowering.inc"), + ("-gen-asm-writer", "lib/Target/AMDGPU/AMDGPUGenAsmWriter.inc"), + ("-gen-asm-matcher", "lib/Target/AMDGPU/AMDGPUGenAsmMatcher.inc"), ("-gen-dag-isel", "lib/Target/AMDGPU/AMDGPUGenDAGISel.inc"), ("-gen-callingconv", "lib/Target/AMDGPU/AMDGPUGenCallingConv.inc"), ("-gen-subtarget", "lib/Target/AMDGPU/AMDGPUGenSubtargetInfo.inc"), - ("-gen-emitter", "lib/Target/AMDGPU/AMDGPUGenMCCodeEmitter.inc"), - ("-gen-dfa-packetizer", "lib/Target/AMDGPU/AMDGPUGenDFAPacketizer.inc"), - ("-gen-asm-writer", "lib/Target/AMDGPU/AMDGPUGenAsmWriter.inc"), - ("-gen-asm-matcher", "lib/Target/AMDGPU/AMDGPUGenAsmMatcher.inc"), ("-gen-disassembler", "lib/Target/AMDGPU/AMDGPUGenDisassemblerTables.inc"), - ("-gen-pseudo-lowering", "lib/Target/AMDGPU/AMDGPUGenMCPseudoLowering.inc"), ("-gen-searchable-tables", "lib/Target/AMDGPU/AMDGPUGenSearchableTables.inc"), ], "tbl_deps": [ ":amdgpu_isel_target_gen", ], }, - { - "name": "AMDGPU", - "lower_name": "amdgpu_r600", - "short_name": "R600", - "tbl_outs": [ - ("-gen-asm-writer", "lib/Target/AMDGPU/R600GenAsmWriter.inc"), - ("-gen-callingconv", "lib/Target/AMDGPU/R600GenCallingConv.inc"), - ("-gen-dag-isel", "lib/Target/AMDGPU/R600GenDAGISel.inc"), - ("-gen-dfa-packetizer", "lib/Target/AMDGPU/R600GenDFAPacketizer.inc"), - ("-gen-instr-info", "lib/Target/AMDGPU/R600GenInstrInfo.inc"), - ("-gen-emitter", "lib/Target/AMDGPU/R600GenMCCodeEmitter.inc"), - ("-gen-register-info", "lib/Target/AMDGPU/R600GenRegisterInfo.inc"), - ("-gen-subtarget", "lib/Target/AMDGPU/R600GenSubtargetInfo.inc"), - ], - }, { "name": "ARM", "lower_name": "arm", "short_name": "ARM", + "dir_name": "ARM", "tbl_outs": [ ("-gen-register-bank", "lib/Target/ARM/ARMGenRegisterBank.inc"), ("-gen-register-info", "lib/Target/ARM/ARMGenRegisterInfo.inc"), @@ -513,6 +506,7 @@ llvm_target_list = [ "name": "NVPTX", "lower_name": "nvptx", "short_name": "NVPTX", + "dir_name": "NVPTX", "tbl_outs": [ ("-gen-register-info", "lib/Target/NVPTX/NVPTXGenRegisterInfo.inc"), ("-gen-instr-info", "lib/Target/NVPTX/NVPTXGenInstrInfo.inc"), @@ -525,6 +519,7 @@ llvm_target_list = [ "name": "PowerPC", "lower_name": "powerpc", "short_name": "PPC", + "dir_name": "PowerPC", "tbl_outs": [ ("-gen-asm-writer", "lib/Target/PowerPC/PPCGenAsmWriter.inc"), ("-gen-asm-matcher", "lib/Target/PowerPC/PPCGenAsmMatcher.inc"), @@ -542,6 +537,7 @@ llvm_target_list = [ "name": "X86", "lower_name": "x86", "short_name": "X86", + "dir_name": "X86", "tbl_outs": [ ("-gen-register-bank", "lib/Target/X86/X86GenRegisterBank.inc"), ("-gen-register-info", "lib/Target/X86/X86GenRegisterInfo.inc"), @@ -556,6 +552,7 @@ llvm_target_list = [ ("-gen-callingconv", "lib/Target/X86/X86GenCallingConv.inc"), ("-gen-subtarget", "lib/Target/X86/X86GenSubtargetInfo.inc"), ("-gen-x86-EVEX2VEX-tables", "lib/Target/X86/X86GenEVEX2VEXTables.inc"), + ("-gen-exegesis", "lib/Target/X86/X86GenExegesis.inc"), ], }, ] @@ -588,25 +585,45 @@ gentbl( ]), ) -[ - gentbl( - name = target["lower_name"] + "_target_gen", +gentbl( + name = "r600_target_gen", + tbl_outs = [ + ("-gen-asm-writer", "lib/Target/AMDGPU/R600GenAsmWriter.inc"), + ("-gen-callingconv", "lib/Target/AMDGPU/R600GenCallingConv.inc"), + ("-gen-dag-isel", "lib/Target/AMDGPU/R600GenDAGISel.inc"), + ("-gen-dfa-packetizer", "lib/Target/AMDGPU/R600GenDFAPacketizer.inc"), + ("-gen-instr-info", "lib/Target/AMDGPU/R600GenInstrInfo.inc"), + ("-gen-emitter", "lib/Target/AMDGPU/R600GenMCCodeEmitter.inc"), + ("-gen-register-info", "lib/Target/AMDGPU/R600GenRegisterInfo.inc"), + ("-gen-subtarget", "lib/Target/AMDGPU/R600GenSubtargetInfo.inc"), + ], + tblgen = ":llvm-tblgen", + td_file = "lib/Target/AMDGPU/R600.td", + td_srcs = [ + ":common_target_td_sources", + ] + glob([ + "lib/Target/AMDGPU/*.td", + ]), +) + +[[ + [gentbl( + name = target["name"] + "CommonTableGen", tbl_outs = target["tbl_outs"], tblgen = ":llvm-tblgen", - td_file = ("lib/Target/" + target["name"] + "/" + target["short_name"] + - ".td"), - td_srcs = glob([ - "lib/Target/" + target["name"] + "/*.td", - "include/llvm/CodeGen/*.td", - "include/llvm/IR/Intrinsics*.td", - "include/llvm/TableGen/*.td", - "include/llvm/Target/*.td", - "include/llvm/Target/GlobalISel/*.td", + td_file = "lib/Target/" + target["dir_name"] + "/" + target["short_name"] + ".td", + td_srcs = [ + ":common_target_td_sources", + ] + glob([ + "lib/Target/" + target["dir_name"] + "/*.td", ]), deps = target.get("tbl_deps", []), - ) - for target in llvm_target_list -] + )], + [alias( + name = target["lower_name"] + "_target_gen", + actual = target["name"] + "CommonTableGen", + )], +] for target in llvm_target_list] # This target is used to provide *.def files to x86_code_gen. # Files with '.def' extension are not allowed in 'srcs' of 'cc_library' rule. @@ -648,17 +665,14 @@ cc_binary( ) cc_library( - name = "all_targets", + name = "AllTargetsCodeGens", deps = [ - ":aarch64_code_gen", - ":amdgpu_code_gen", - ":arm_code_gen", - ":nvptx_code_gen", - ":powerpc_code_gen", - ":x86_code_gen", + target["name"] + "CodeGen" + for target in llvm_target_list ], ) +########################## Begin generated content ########################## cc_library( name = "AArch64AsmParser", srcs = glob([ @@ -997,10 +1011,10 @@ cc_library( copts = llvm_copts + ["-Iexternal/llvm-project/llvm/lib/Target/AMDGPU"], deps = [ ":Support", - ":amdgpu_r600_target_gen", ":amdgpu_target_gen", ":config", ":core", + ":r600_target_gen", ], ) @@ -1028,9 +1042,9 @@ cc_library( ":Core", ":MC", ":Support", - ":amdgpu_r600_target_gen", ":amdgpu_target_gen", ":config", + ":r600_target_gen", ], ) @@ -5541,3 +5555,18 @@ alias( name = "x86_target_disassembler", actual = ":x86_disassembler", ) + +alias( + name = "all_targets", + actual = ":AllTargetsCodeGens", +) + +alias( + name = "instcombine_transforms_gen", + actual = ":InstCombineTableGen", +) + +alias( + name = "utils_tablegen", + actual = ":tblgen", +) From 012401fb38f081b46a4b30bfbf6a6128c8cd8310 Mon Sep 17 00:00:00 2001 From: Srinivas Vasudevan Date: Tue, 9 Jun 2020 20:21:50 -0700 Subject: [PATCH 168/178] Improve precision of Log1p in the range [1e-4, 1e-3]. PiperOrigin-RevId: 315615050 Change-Id: Idc2f34ae4f2f231d49bebb308eb88e76f8c68878 --- .../compiler/tests/special_math_test.py | 75 +++++++++++++++++++ tensorflow/compiler/tests/unary_ops_test.py | 14 ++-- .../xla/service/elemental_ir_emitter.cc | 47 +++++++++++- .../xla/service/elemental_ir_emitter.h | 4 + 4 files changed, 132 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/tests/special_math_test.py b/tensorflow/compiler/tests/special_math_test.py index 3efaa6434be..246ab2a1641 100644 --- a/tensorflow/compiler/tests/special_math_test.py +++ b/tensorflow/compiler/tests/special_math_test.py @@ -61,6 +61,81 @@ def implicit_reparameterization_grad(a, x): return -gen_math_ops.igamma_grad_a(a, x) / prob +@def_function.function(experimental_compile=True) +def _log1p(x): + return math_ops.log1p(x) + + +class Log1pTest(xla_test.XLATestCase, parameterized.TestCase): + + def setUp(self): + if flags.FLAGS.vary_seed: + entropy = os.urandom(64) + if six.PY2: + answer = int(entropy.encode('hex'), 16) + else: + answer = int.from_bytes(entropy, 'big') + np.random.seed(answer % (2**32 - 1)) + super(Log1pTest, self).setUp() + + def adjust_tolerance_for_tpu(self, dtype, rtol, atol): + if self.device not in ['TPU']: + return rtol, atol + + if dtype == np.float32: + return 4e-4, 0. + return 1e-10, 0. + + def _test_range(self, low, high, dtype, rtol, atol, is_negative=False): + # Test values near zero. + rtol, atol = self.adjust_tolerance_for_tpu(dtype, rtol, atol) + x = np.exp(np.random.uniform( + low=low, high=high, size=[NUM_SAMPLES])).astype(dtype) + if is_negative: + x = -x + expected_values = np.log1p(x) + with self.session() as sess: + with self.test_scope(): + actual = _log1p(x) + actual = sess.run(actual) + self.assertAllClose(expected_values, actual, atol=atol, rtol=rtol) + + @parameterized.parameters((np.float32, 1e-7, 0.), + (np.float64, 1e-15, 0.)) + def testSmallX(self, dtype, rtol, atol): + self._test_range(-40., -20., dtype, rtol, atol, is_negative=False) + self._test_range(-40., -20., dtype, rtol, atol, is_negative=True) + + @parameterized.parameters((np.float32, 1e-7, 0.), + (np.float64, 1e-15, 0.)) + def testGreaterThanNegativeTwentyExponent(self, dtype, rtol, atol): + self._test_range(-20., -10., dtype, rtol, atol, is_negative=False) + self._test_range(-20., -10., dtype, rtol, atol, is_negative=True) + + @parameterized.parameters((np.float32, 1e-7, 0.), + (np.float64, 1e-15, 0.)) + def testGreaterThanNegativeTenExponent(self, dtype, rtol, atol): + self._test_range(-10., -5., dtype, rtol, atol, is_negative=False) + self._test_range(-10., -5., dtype, rtol, atol, is_negative=True) + + @parameterized.parameters((np.float32, 2e-7, 0.), + (np.float64, 1e-15, 0.)) + def testGreaterThanNegativeFiveExponent(self, dtype, rtol, atol): + self._test_range(-5., -1., dtype, rtol, atol, is_negative=False) + self._test_range(-5., -1., dtype, rtol, atol, is_negative=True) + + @parameterized.parameters((np.float32, 4e-7, 0.), + (np.float64, 3e-14, 0.)) + def testXGreaterThanOneTenth(self, dtype, rtol, atol): + self._test_range(-1., 0., dtype, rtol, atol, is_negative=False) + self._test_range(-1., 0., dtype, rtol, atol, is_negative=True) + + @parameterized.parameters((np.float32, 2e-7, 0.), + (np.float64, 2e-15, 0.)) + def testXGreaterThanOne(self, dtype, rtol, atol): + self._test_range(0., 3., dtype, rtol, atol, is_negative=False) + + class IgammaTest(xla_test.XLATestCase, parameterized.TestCase): def setUp(self): diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 567e75a9a17..efea525b6b9 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -292,13 +292,17 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[1, 2]], dtype=dtype), expected=np.array([[0.540297, -0.41614]], dtype=dtype)) + # Confirm that log1p will remain precise across a range of small values. self._assertOpOutputMatchesExpected( math_ops.log1p, - np.array([[1e-14, 1e-15, 0.6]], dtype=dtype), - expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]], - dtype=dtype)).astype(dtype), - rtol=1e-4, - atol=1e-6) + np.array([[1e-14, 1e-15, 0.6, 2] + [x * 1e-5 for x in range(1, 20)]], + dtype=dtype), + expected=np.log1p( + np.array( + [[1e-14, 1e-15, 0.6, 2] + [x * 1e-5 for x in range(1, 20)]], + dtype=dtype)).astype(dtype), + rtol=1e-15 if dtype == np.float64 else 1e-4, + atol=1e-15 if dtype == np.float64 else 1e-4) self._assertOpOutputMatchesExpected( math_ops.rint, diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 8cb660de46c..e4097b0c06f 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1336,9 +1336,40 @@ StatusOr ElementalIrEmitter::EmitLog1p(PrimitiveType prim_type, // When x is large, the naive evaluation of ln(x + 1) is more // accurate than the Taylor series. TF_ASSIGN_OR_RETURN(auto for_large_x, EmitLog(prim_type, FAdd(x, one))); - // The Taylor series for ln(x+1) is x - x^2/2 - x^3/3 + …. - auto for_small_x = FMul(FAdd(FMul(negative_half, x), one), x); - const auto kAntilogarithmIsSmallThreshold = 1e-4; + // When x is small, (defined to be less than sqrt(2) / 2), use a rational + // approximation. The approximation below is based on one from the Cephes + // Mathematical Library. + // + // sqrt(2) - 1. + const auto kAntilogarithmIsSmallThreshold = 0.41421356237309504880; + + static const std::array kDenominatorCoeffs{ + 1., + 1.5062909083469192043167E1, + 8.3047565967967209469434E1, + 2.2176239823732856465394E2, + 3.0909872225312059774938E2, + 2.1642788614495947685003E2, + 6.0118660497603843919306E1, + }; + + static const std::array kNumeratorCoeffs{ + 4.5270000862445199635215E-5, 4.9854102823193375972212E-1, + 6.5787325942061044846969E0, 2.9911919328553073277375E1, + 6.0949667980987787057556E1, 5.7112963590585538103336E1, + 2.0039553499201281259648E1, + }; + + auto x_squared = FMul(x, x); + TF_ASSIGN_OR_RETURN(auto denominator, + EvaluatePolynomial(type, x, kDenominatorCoeffs)); + TF_ASSIGN_OR_RETURN(auto numerator, + EvaluatePolynomial(type, x, kNumeratorCoeffs)); + auto for_small_x = FDiv(numerator, denominator); + for_small_x = FMul(FMul(x, x_squared), for_small_x); + for_small_x = FAdd(FMul(negative_half, x_squared), for_small_x); + for_small_x = FAdd(x, for_small_x); + auto abs_x = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {value}, {type}, b_); auto x_is_small = FCmpOLT( @@ -2699,4 +2730,14 @@ StatusOr ElementalIrEmitter::EmitElementalReduce( } } +// Evaluate polynomial using Horner's method. +StatusOr ElementalIrEmitter::EvaluatePolynomial( + llvm::Type* type, llvm::Value* x, absl::Span coefficients) { + llvm::Value* poly = llvm::ConstantFP::get(type, 0.0); + for (const double c : coefficients) { + poly = FAdd(FMul(poly, x), llvm::ConstantFP::get(type, c)); + } + return poly; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index 06a9d7b194c..e39d2dd99ec 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -258,6 +258,10 @@ class ElementalIrEmitter : public IrBuilderMixin { StatusOr EmitComplexPower(const HloInstruction* op, llvm::Value* a, llvm::Value* b, llvm::Value* c, llvm::Value* d); + + // Evaluates a polynomial using Horner's method. + StatusOr EvaluatePolynomial( + llvm::Type* type, llvm::Value* x, absl::Span coefficients); }; } // namespace xla From 3d53fd687526c1ea982f597497123e1c39ad9cef Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 9 Jun 2020 20:55:37 -0700 Subject: [PATCH 169/178] Update feature_column to not rely on Keras initializer. This is trying to remove the deps from Tensorflow to Keras. PiperOrigin-RevId: 315619009 Change-Id: I0f39881eb91ab2003aa5a4f600fc95b53333c0bc --- tensorflow/python/feature_column/BUILD | 1 - .../feature_column/feature_column_v2.py | 19 ++++++++++++------- .../feature_column/feature_column_v2_test.py | 16 +++++++++++----- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index 52f1186c5d9..bd4152c6d42 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -90,7 +90,6 @@ py_library( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/eager:context", - "//tensorflow/python/keras:initializers", "//tensorflow/python/keras/utils:generic_utils", "//tensorflow/python/training/tracking", "//tensorflow/python/training/tracking:data_structures", diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index a03e4da0fae..73d33c1e0e6 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -144,12 +144,12 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape # TODO(b/118385027): Dependency on keras can be problematic if Keras moves out # of the main repo. -from tensorflow.python.keras import initializers from tensorflow.python.keras.utils import generic_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import init_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops @@ -165,6 +165,7 @@ from tensorflow.python.training.tracking import data_structures from tensorflow.python.training.tracking import tracking from tensorflow.python.util import deprecation from tensorflow.python.util import nest +from tensorflow.python.util import tf_inspect from tensorflow.python.util.compat import collections_abc from tensorflow.python.util.tf_export import tf_export @@ -588,7 +589,7 @@ def embedding_column(categorical_column, 'Embedding of column_name: {}'.format( categorical_column.name)) if initializer is None: - initializer = initializers.truncated_normal( + initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1 / math.sqrt(dimension)) return EmbeddingColumn( @@ -730,7 +731,7 @@ def shared_embedding_columns(categorical_columns, if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified.') if initializer is None: - initializer = initializers.truncated_normal( + initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1. / math.sqrt(dimension)) # Sort the columns so the default collection name is deterministic even if the @@ -913,7 +914,7 @@ def shared_embedding_columns_v2(categorical_columns, if (initializer is not None) and (not callable(initializer)): raise ValueError('initializer must be callable if specified.') if initializer is None: - initializer = initializers.truncated_normal( + initializer = init_ops.truncated_normal_initializer( mean=0.0, stddev=1. / math.sqrt(dimension)) # Sort the columns so the default collection name is deterministic even if the @@ -3030,7 +3031,8 @@ class EmbeddingColumn( config = dict(zip(self._fields, self)) config['categorical_column'] = serialize_feature_column( self.categorical_column) - config['initializer'] = initializers.serialize(self.initializer) + config['initializer'] = generic_utils.serialize_keras_object( + self.initializer) return config @classmethod @@ -3043,8 +3045,11 @@ class EmbeddingColumn( kwargs = _standardize_and_copy_config(config) kwargs['categorical_column'] = deserialize_feature_column( config['categorical_column'], custom_objects, columns_by_name) - kwargs['initializer'] = initializers.deserialize( - config['initializer'], custom_objects=custom_objects) + all_initializers = dict(tf_inspect.getmembers(init_ops, tf_inspect.isclass)) + kwargs['initializer'] = generic_utils.deserialize_keras_object( + config['initializer'], + module_objects=all_initializers, + custom_objects=custom_objects) return cls(**kwargs) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 844478c879b..dda1af8a00e 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -40,7 +40,6 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util -from tensorflow.python.keras import initializers from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import parsing_ops @@ -117,6 +116,7 @@ class LazyColumnTest(test.TestCase): class TransformCounter(BaseFeatureColumnForTests): def __init__(self): + super(TransformCounter, self).__init__() self.num_transform = 0 @property @@ -4285,6 +4285,7 @@ class TransformFeaturesTest(test.TestCase): class _LoggerColumn(BaseFeatureColumnForTests): def __init__(self, name): + super(_LoggerColumn, self).__init__() self._name = name @property @@ -5362,9 +5363,6 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): self.assertEqual([categorical_column], embedding_column.parents) config = embedding_column.get_config() - # initializer config contains `dtype` in v1. - initializer_config = initializers.serialize(initializers.truncated_normal( - mean=0.0, stddev=1 / np.sqrt(2))) self.assertEqual( { 'categorical_column': { @@ -5378,7 +5376,15 @@ class EmbeddingColumnTest(test.TestCase, parameterized.TestCase): 'ckpt_to_load_from': None, 'combiner': 'mean', 'dimension': 2, - 'initializer': initializer_config, + 'initializer': { + 'class_name': 'TruncatedNormal', + 'config': { + 'dtype': 'float32', + 'stddev': 0.7071067811865475, + 'seed': None, + 'mean': 0.0 + } + }, 'max_norm': None, 'tensor_name_in_ckpt': None, 'trainable': True, From 3cdb06cbabc889322c35e693b3ce351b1dbbe2e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 21:01:48 -0700 Subject: [PATCH 170/178] [XLA] Preserve layout when sinking broadcasts through slice/dynamic slice AlgebraicSimplifier could be run after layout assignment passes so we want to preserve the layout chosen for the input of operations that use slice or dynamic slice. Using MakeBroadcastHlo ignores the layout of the shape we pass as input. Use CreateBroadcast instead that preserves the shape as-is. PiperOrigin-RevId: 315619861 Change-Id: I7b16196b2de03f709cf395f60708cfa26ccf1cb9 --- .../xla/service/algebraic_simplifier.cc | 24 +++++++--- .../xla/service/algebraic_simplifier_test.cc | 48 +++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index cd4dc05ab09..f7dd3de7d4d 100755 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -3623,12 +3623,17 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) { new_slice_strides.push_back(slice->slice_strides(dim)); new_slice_limits.push_back(slice->slice_limits(dim)); } + VLOG(3) << "Sink broadcast through slice"; + VLOG(3) << "Original slice: " << slice->ToString(); + VLOG(3) << "Original broadcast: " << broadcast->ToString(); TF_ASSIGN_OR_RETURN(auto new_slice, MakeSliceHlo(broadcast_operand, new_slice_starts, new_slice_limits, new_slice_strides)); - return ReplaceInstruction( - slice, - MakeBroadcastHlo(new_slice, broadcast->dimensions(), slice->shape())); + auto new_broadcast = HloInstruction::CreateBroadcast( + slice->shape(), new_slice, broadcast->dimensions()); + VLOG(3) << "New slice: " << slice->ToString(); + VLOG(3) << "New broadcast: " << new_broadcast->ToString(); + return ReplaceWithNewInstruction(slice, std::move(new_broadcast)); } // Try to simplify concat -> slice to an operand of concat. @@ -3708,16 +3713,21 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice( new_indices.push_back(dynamic_slice->mutable_operand(1 + dim)); new_slice_sizes.push_back(dynamic_slice->slice_sizes(dim)); } + + VLOG(3) << "Sink broadcast through dynamic slice"; + VLOG(3) << "Original dynamic slice: " << dynamic_slice->ToString(); + VLOG(3) << "Original broadcast: " << operand->ToString(); HloInstruction* new_dynamic_slice = broadcast_operand; if (!new_slice_sizes.empty()) { TF_ASSIGN_OR_RETURN( new_dynamic_slice, MakeDynamicSliceHlo(broadcast_operand, new_indices, new_slice_sizes)); } - return ReplaceInstruction( - dynamic_slice, - MakeBroadcastHlo(new_dynamic_slice, operand->dimensions(), - dynamic_slice->shape())); + auto new_broadcast = HloInstruction::CreateBroadcast( + dynamic_slice->shape(), new_dynamic_slice, operand->dimensions()); + VLOG(3) << "New dynamic slice: " << dynamic_slice->ToString(); + VLOG(3) << "New broadcast: " << new_broadcast->ToString(); + return ReplaceWithNewInstruction(dynamic_slice, std::move(new_broadcast)); } // Convert a dynamic slice into a slice if all offsets are constant and the diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 3ac47821654..b28f6669a4b 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2539,6 +2539,28 @@ TEST_F(AlgebraicSimplifierTest, SliceOfBroadcast) { EXPECT_THAT(root, GmockMatch(m::Broadcast(m::Slice(m::Parameter(0))))); } +TEST_F(AlgebraicSimplifierTest, SliceOfBroadcastPreserveLayout) { + const char* hlo_string = R"( + HloModule module + + ENTRY test { + p0 = f32[10,20] parameter(0) + b = f32[10,30,20]{2,0,1:T(256)} broadcast(p0), dimensions={0,2} + ROOT s = f32[5,5,5]{2,0,1:T(256)} slice(b), slice={[0:5:1], [5:25:4], [5:15:2]} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + const Shape original_slice_shape = + module->entry_computation()->root_instruction()->shape(); + HloPassFix simplifier(default_options_); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Broadcast(m::Slice(m::Parameter(0))))); + EXPECT_TRUE(ShapeUtil::Equal(root->shape(), original_slice_shape)); +} + TEST_F(AlgebraicSimplifierTest, DynamicSliceOfBroadcast) { const char* hlo_string = R"( HloModule module @@ -2562,6 +2584,32 @@ TEST_F(AlgebraicSimplifierTest, DynamicSliceOfBroadcast) { m::Parameter(0), m::Parameter(1), m::Parameter(3))))); } +TEST_F(AlgebraicSimplifierTest, DynamicSliceOfBroadcastPreserveLayout) { + const char* hlo_string = R"( + HloModule module + + ENTRY test { + p0 = f32[10,20] parameter(0) + i0 = s32[] parameter(1) + i1 = s32[] parameter(2) + i2 = s32[] parameter(3) + b = f32[10,30,20]{2,0,1:T(256)} broadcast(p0), dimensions={0,2} + ROOT ds = f32[5,5,5]{2,0,1:T(256)} dynamic-slice(b, i0, i1, i2), dynamic_slice_sizes={5,5,5} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + const Shape original_dynslice_shape = + module->entry_computation()->root_instruction()->shape(); + HloPassFix simplifier(default_options_); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Broadcast(m::DynamicSlice( + m::Parameter(0), m::Parameter(1), m::Parameter(3))))); + EXPECT_TRUE(ShapeUtil::Equal(root->shape(), original_dynslice_shape)); +} + TEST_F(AlgebraicSimplifierTest, TransposeIsReshape) { const char* hlo_string = R"( HloModule module From 2e9eaece7b9fa2d500040acda6fe1dfcefb84984 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Tue, 9 Jun 2020 21:06:06 -0700 Subject: [PATCH 171/178] Set static shape for category encoding sparse output PiperOrigin-RevId: 315620607 Change-Id: Ieb424bd90781ea8d61e74e12224c442d896e4739 --- .../layers/preprocessing/category_encoding.py | 14 ++++++++- .../preprocessing/category_encoding_test.py | 30 +++++++++---------- .../preprocessing/text_vectorization_test.py | 2 +- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding.py b/tensorflow/python/keras/layers/preprocessing/category_encoding.py index 74f5a3a7ed8..26c8d437c08 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_encoding.py +++ b/tensorflow/python/keras/layers/preprocessing/category_encoding.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_spec from tensorflow.python.keras import backend as K from tensorflow.python.keras.engine import base_preprocessing_layer +from tensorflow.python.keras.engine.input_spec import InputSpec from tensorflow.python.keras.utils import layer_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import bincount_ops @@ -163,6 +164,8 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer): dtype=K.floatx(), initializer=initializer) + self.input_spec = InputSpec(ndim=2) + def compute_output_shape(self, input_shape): return tensor_shape.TensorShape([input_shape[0], self._max_tokens]) @@ -277,6 +280,9 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer): # If the input is a sparse tensor, we densify it with the default value of # -1. Because -1 is ignored by one_hot, this effectively drops the non-set # positions from the output encoding. + if self._sparse: + raise ValueError("`sparse=True` with `output_mode=tfidf` " + "is not supported.") if isinstance(inputs, sparse_tensor.SparseTensor): inputs = sparse_ops.sparse_tensor_to_dense(inputs, default_value=-1) one_hot_data = array_ops.one_hot(inputs, depth=out_depth) @@ -293,7 +299,13 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer): minlength=out_depth, axis=-1, binary_output=binary_output) - return math_ops.cast(result, K.floatx()) + result = math_ops.cast(result, K.floatx()) + batch_size = array_ops.shape(result)[0] + result = sparse_tensor.SparseTensor( + indices=result.indices, + values=result.values, + dense_shape=[batch_size, out_depth]) + return result else: result = bincount_ops.bincount( inputs, diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py b/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py index edfacf0d2b3..048ac3734af 100644 --- a/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py +++ b/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py @@ -31,13 +31,12 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor from tensorflow.python.keras import backend from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import testing_utils from tensorflow.python.keras.layers import core from tensorflow.python.keras.layers.preprocessing import category_encoding from tensorflow.python.keras.layers.preprocessing import category_encoding_v1 from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils -from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops -from tensorflow.python.ops import variables from tensorflow.python.ops.ragged import ragged_factory_ops from tensorflow.python.platform import test @@ -253,23 +252,24 @@ class CategoryEncodingInputTest(keras_parameterized.TestCase, sparse_ops.sparse_tensor_to_dense(sp_output_dataset, default_value=0), output_dataset) + # TODO(b/158570051): Support KerasTensor # Keras functional model doesn't support dense layer stacked with sparse out. - def DISABLED_test_sparse_output_and_dense_layer(self): - input_array = constant_op.constant([[1, 2, 3], [3, 3, 0]]) + def test_sparse_output_and_dense_layer(self): + with testing_utils.use_keras_tensors_scope(False): + input_array = constant_op.constant([[1, 2, 3], [3, 3, 0]]) - max_tokens = 4 + max_tokens = 4 - input_data = keras.Input(shape=(None,), dtype=dtypes.int32) - encoding_layer = get_layer_class()( - max_tokens=max_tokens, output_mode=category_encoding.COUNT, sparse=True) - int_data = encoding_layer(input_data) - output_data = math_ops.cast(int_data, dtypes.float32) - weights = variables.Variable([[.1], [.2], [.3], [.4]], dtype=dtypes.float32) - weights_mult = lambda x: sparse_ops.sparse_tensor_dense_matmul(x, weights) - output_data = keras.layers.Lambda(weights_mult)(output_data) + input_data = keras.Input(shape=(None,), dtype=dtypes.int32) + encoding_layer = get_layer_class()( + max_tokens=max_tokens, output_mode=category_encoding.COUNT, + sparse=True) + int_data = encoding_layer(input_data) + dense_layer = keras.layers.Dense(units=1) + output_data = dense_layer(int_data) - model = keras.Model(inputs=input_data, outputs=output_data) - _ = model.predict(input_array, steps=1) + model = keras.Model(inputs=input_data, outputs=output_data) + _ = model.predict(input_array, steps=1) @keras_parameterized.run_all_keras_modes diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py index c641b2b71c9..88df3013257 100644 --- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py +++ b/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py @@ -1500,7 +1500,7 @@ class TextVectorizationSavingTest( loaded_model = keras.models.load_model(output_path) self.assertAllEqual(loaded_model.predict(input_array), expected_output) - def test_saving_with_tfidf(self): + def DISABLE_test_saving_with_tfidf(self): vocab_data = ["earth", "wind", "and", "fire"] tfidf_data = [.5, .25, .2, .125] input_array = np.array([["earth", "wind", "and", "earth"], From 090f260aab3dab00bcdf0232962e753bb9fab696 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Tue, 9 Jun 2020 21:15:43 -0700 Subject: [PATCH 172/178] Copy the generic_util logic to feature column for serialization/deserialization. This is the final dependency from feature_column to Keras. The copied functions are trimmed down version since it doesn't have access to Keras global custom object registration, which I don't think are used by feature column. The custom object scope will still work. PiperOrigin-RevId: 315621765 Change-Id: I2ae22af83d625c8e55c7fe21b42194bbdbfded23 --- tensorflow/python/feature_column/BUILD | 1 - .../feature_column/feature_column_v2.py | 21 ++- .../python/feature_column/serialization.py | 144 +++++++++++++++++- 3 files changed, 148 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index bd4152c6d42..b3c6e061c22 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -90,7 +90,6 @@ py_library( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/eager:context", - "//tensorflow/python/keras/utils:generic_utils", "//tensorflow/python/training/tracking", "//tensorflow/python/training/tracking:data_structures", "//third_party/py/numpy", diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index 73d33c1e0e6..c7a48dd1df9 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -142,9 +142,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape -# TODO(b/118385027): Dependency on keras can be problematic if Keras moves out -# of the main repo. -from tensorflow.python.keras.utils import generic_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -2609,7 +2606,8 @@ class NumericColumn( def get_config(self): """See 'FeatureColumn` base class.""" config = dict(zip(self._fields, self)) - config['normalizer_fn'] = generic_utils.serialize_keras_object( + from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top + config['normalizer_fn'] = serialization._serialize_keras_object( # pylint: disable=protected-access self.normalizer_fn) config['dtype'] = self.dtype.name return config @@ -2618,8 +2616,9 @@ class NumericColumn( def from_config(cls, config, custom_objects=None, columns_by_name=None): """See 'FeatureColumn` base class.""" _check_config_keys(config, cls._fields) + from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top kwargs = _standardize_and_copy_config(config) - kwargs['normalizer_fn'] = generic_utils.deserialize_keras_object( + kwargs['normalizer_fn'] = serialization._deserialize_keras_object( # pylint: disable=protected-access config['normalizer_fn'], custom_objects=custom_objects) kwargs['dtype'] = dtypes.as_dtype(config['dtype']) @@ -3027,11 +3026,11 @@ class EmbeddingColumn( def get_config(self): """See 'FeatureColumn` base class.""" - from tensorflow.python.feature_column.serialization import serialize_feature_column # pylint: disable=g-import-not-at-top + from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top config = dict(zip(self._fields, self)) - config['categorical_column'] = serialize_feature_column( + config['categorical_column'] = serialization.serialize_feature_column( self.categorical_column) - config['initializer'] = generic_utils.serialize_keras_object( + config['initializer'] = serialization._serialize_keras_object( # pylint: disable=protected-access self.initializer) return config @@ -3040,13 +3039,13 @@ class EmbeddingColumn( """See 'FeatureColumn` base class.""" if 'use_safe_embedding_lookup' not in config: config['use_safe_embedding_lookup'] = True - from tensorflow.python.feature_column.serialization import deserialize_feature_column # pylint: disable=g-import-not-at-top + from tensorflow.python.feature_column import serialization # pylint: disable=g-import-not-at-top _check_config_keys(config, cls._fields) kwargs = _standardize_and_copy_config(config) - kwargs['categorical_column'] = deserialize_feature_column( + kwargs['categorical_column'] = serialization.deserialize_feature_column( config['categorical_column'], custom_objects, columns_by_name) all_initializers = dict(tf_inspect.getmembers(init_ops, tf_inspect.isclass)) - kwargs['initializer'] = generic_utils.deserialize_keras_object( + kwargs['initializer'] = serialization._deserialize_keras_object( # pylint: disable=protected-access config['initializer'], module_objects=all_initializers, custom_objects=custom_objects) diff --git a/tensorflow/python/feature_column/serialization.py b/tensorflow/python/feature_column/serialization.py index 530fe54c876..258ef6850f0 100644 --- a/tensorflow/python/feature_column/serialization.py +++ b/tensorflow/python/feature_column/serialization.py @@ -23,12 +23,9 @@ import six from tensorflow.python.feature_column import feature_column_v2 as fc_lib from tensorflow.python.feature_column import sequence_feature_column as sfc_lib from tensorflow.python.ops import init_ops -from tensorflow.python.util.lazy_loader import LazyLoader +from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_inspect -# Prevent circular dependencies with Keras serialization. -generic_utils = LazyLoader( - 'generic_utils', globals(), - 'tensorflow.python.keras.utils.generic_utils') _FEATURE_COLUMNS = [ fc_lib.BucketizedColumn, fc_lib.CrossedColumn, fc_lib.EmbeddingColumn, @@ -124,7 +121,7 @@ def deserialize_feature_column(config, columns_by_name = {} (cls, - cls_config) = generic_utils.class_and_config_for_serialized_keras_object( + cls_config) = _class_and_config_for_serialized_keras_object( config, module_objects=module_feature_column_classes, custom_objects=custom_objects, @@ -205,3 +202,138 @@ def _column_name_with_class_name(fc): A unique name as a string. """ return fc.__class__.__name__ + ':' + fc.name + + +def _serialize_keras_object(instance): + """Serialize a Keras object into a JSON-compatible representation.""" + _, instance = tf_decorator.unwrap(instance) + if instance is None: + return None + + if hasattr(instance, 'get_config'): + name = instance.__class__.__name__ + config = instance.get_config() + serialization_config = {} + for key, item in config.items(): + if isinstance(item, six.string_types): + serialization_config[key] = item + continue + + # Any object of a different type needs to be converted to string or dict + # for serialization (e.g. custom functions, custom classes) + try: + serialized_item = _serialize_keras_object(item) + if isinstance(serialized_item, dict) and not isinstance(item, dict): + serialized_item['__passive_serialization__'] = True + serialization_config[key] = serialized_item + except ValueError: + serialization_config[key] = item + + return {'class_name': name, 'config': serialization_config} + if hasattr(instance, '__name__'): + return instance.__name__ + raise ValueError('Cannot serialize', instance) + + +def _deserialize_keras_object(identifier, + module_objects=None, + custom_objects=None, + printable_module_name='object'): + """Turns the serialized form of a Keras object back into an actual object.""" + if identifier is None: + return None + + if isinstance(identifier, dict): + # In this case we are dealing with a Keras config dictionary. + config = identifier + (cls, cls_config) = _class_and_config_for_serialized_keras_object( + config, module_objects, custom_objects, printable_module_name) + + if hasattr(cls, 'from_config'): + arg_spec = tf_inspect.getfullargspec(cls.from_config) + custom_objects = custom_objects or {} + + if 'custom_objects' in arg_spec.args: + return cls.from_config( + cls_config, + custom_objects=dict( + list(custom_objects.items()))) + return cls.from_config(cls_config) + else: + # Then `cls` may be a function returning a class. + # in this case by convention `config` holds + # the kwargs of the function. + custom_objects = custom_objects or {} + return cls(**cls_config) + elif isinstance(identifier, six.string_types): + object_name = identifier + if custom_objects and object_name in custom_objects: + obj = custom_objects.get(object_name) + else: + obj = module_objects.get(object_name) + if obj is None: + raise ValueError( + 'Unknown ' + printable_module_name + ': ' + object_name) + # Classes passed by name are instantiated with no args, functions are + # returned as-is. + if tf_inspect.isclass(obj): + return obj() + return obj + elif tf_inspect.isfunction(identifier): + # If a function has already been deserialized, return as is. + return identifier + else: + raise ValueError('Could not interpret serialized %s: %s' % + (printable_module_name, identifier)) + + +def _class_and_config_for_serialized_keras_object( + config, + module_objects=None, + custom_objects=None, + printable_module_name='object'): + """Returns the class name and config for a serialized keras object.""" + if (not isinstance(config, dict) or 'class_name' not in config or + 'config' not in config): + raise ValueError('Improper config format: ' + str(config)) + + class_name = config['class_name'] + cls = _get_registered_object(class_name, custom_objects=custom_objects, + module_objects=module_objects) + if cls is None: + raise ValueError('Unknown ' + printable_module_name + ': ' + class_name) + + cls_config = config['config'] + + deserialized_objects = {} + for key, item in cls_config.items(): + if isinstance(item, dict) and '__passive_serialization__' in item: + deserialized_objects[key] = _deserialize_keras_object( + item, + module_objects=module_objects, + custom_objects=custom_objects, + printable_module_name='config_item') + elif (isinstance(item, six.string_types) and + tf_inspect.isfunction(_get_registered_object(item, custom_objects))): + # Handle custom functions here. When saving functions, we only save the + # function's name as a string. If we find a matching string in the custom + # objects during deserialization, we convert the string back to the + # original function. + # Note that a potential issue is that a string field could have a naming + # conflict with a custom function name, but this should be a rare case. + # This issue does not occur if a string field has a naming conflict with + # a custom object, since the config of an object will always be a dict. + deserialized_objects[key] = _get_registered_object(item, custom_objects) + for key, item in deserialized_objects.items(): + cls_config[key] = deserialized_objects[key] + + return (cls, cls_config) + + +def _get_registered_object(name, custom_objects=None, module_objects=None): + if custom_objects and name in custom_objects: + return custom_objects[name] + elif module_objects and name in module_objects: + return module_objects[name] + return None + From b48bafc73fcea9ff67626b2a50f09133ef792bd2 Mon Sep 17 00:00:00 2001 From: Henry Tan Date: Tue, 9 Jun 2020 21:56:41 -0700 Subject: [PATCH 173/178] TPU library internal change. PiperOrigin-RevId: 315627327 Change-Id: Ib792cb2983f64f1e4a4a14a4acb96230d4a1f622 --- tensorflow/core/tpu/kernels/BUILD | 8 +++++ .../core/tpu/kernels/tpu_compile_c_api.h | 9 ----- tensorflow/core/tpu/kernels/tpu_util_c_api.h | 35 +++++++++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/tpu/kernels/tpu_util_c_api.h diff --git a/tensorflow/core/tpu/kernels/BUILD b/tensorflow/core/tpu/kernels/BUILD index d9d843b518f..6bfd7b61b28 100644 --- a/tensorflow/core/tpu/kernels/BUILD +++ b/tensorflow/core/tpu/kernels/BUILD @@ -286,6 +286,14 @@ cc_library( ], ) +cc_library( + name = "tpu_util_c_api_hdrs", + hdrs = ["tpu_util_c_api.h"], + deps = [ + "//tensorflow/stream_executor/tpu:proto_helper", + ], +) + cc_library( name = "tpu_util", srcs = ["tpu_util.cc"], diff --git a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h index 53e79aa51b0..8b8ab3ab41c 100644 --- a/tensorflow/core/tpu/kernels/tpu_compile_c_api.h +++ b/tensorflow/core/tpu/kernels/tpu_compile_c_api.h @@ -92,9 +92,6 @@ uint64_t TpuCompile_CreateGuaranteedConstFingerprint(uint64_t fingerprint, const char* data, size_t size); -// Checks if whether a TPU compilation is enabled. -bool TpuCompile_IsTpuCompilationEnabled(); - // Executes the computations using XLA TPU compiler and returns TPU programs // ready for execution. void TpuCompile_CompileAheadOfTime( @@ -108,12 +105,6 @@ void TpuCompile_BuildXLADeviceAssignment( const XLA_TpuMeshState* mesh_state, TpuSerializedProto* serialized_device_assignment, SE_Status* status); -// Converts an XLA `Shape` into its equivalent TPU `Shape` representation. -void TpuCompile_ToTpuShapeRepresentation( - TpuSerializedProto serialized_xla_shape, int data_type, - bool use_fast_memory, TpuSerializedProto* serialized_tensor_shape, - SE_Status* status); - } // extern "C" #endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_COMPILE_C_API_H_ diff --git a/tensorflow/core/tpu/kernels/tpu_util_c_api.h b/tensorflow/core/tpu/kernels/tpu_util_c_api.h new file mode 100644 index 00000000000..4d992449cfc --- /dev/null +++ b/tensorflow/core/tpu/kernels/tpu_util_c_api.h @@ -0,0 +1,35 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_C_API_H_ +#define TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_C_API_H_ + +#include "tensorflow/stream_executor/tpu/proto_helper.h" + +typedef struct SE_Status SE_Status; + +extern "C" { + +// Checks if whether a TPU compilation is enabled. +bool TpuCompile_IsTpuCompilationEnabled(); + +// Converts an XLA `Shape` into its equivalent TPU `Shape` representation. +void TpuCompile_ToTpuShapeRepresentation( + TpuSerializedProto serialized_xla_shape, int data_type, + bool use_fast_memory, TpuSerializedProto* serialized_tensor_shape, + SE_Status* status); + +} // extern "C" + +#endif // TENSORFLOW_CORE_TPU_KERNELS_TPU_UTIL_C_API_H_ From ef47bbbd57cba8fcc7ae11df8c7141d6c68ba0d0 Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 9 Jun 2020 21:59:35 -0700 Subject: [PATCH 174/178] [TF/XLA] Only force retracing for non-unique XLA context ID for TPUReplicatedContext Fixes https://github.com/tensorflow/tensorflow/issues/39872 PiperOrigin-RevId: 315627798 Change-Id: I133b82a7214474fb0d3ef2cb0875243b049075c5 --- .../python/eager/def_function_xla_jit_test.py | 19 +++++++++++++++++++ tensorflow/python/eager/function.py | 7 ++++--- tensorflow/python/ops/control_flow_ops.py | 5 +++++ tensorflow/python/tpu/tpu.py | 6 ++++++ 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/def_function_xla_jit_test.py b/tensorflow/python/eager/def_function_xla_jit_test.py index b63a3b434d4..78d44a81b0b 100644 --- a/tensorflow/python/eager/def_function_xla_jit_test.py +++ b/tensorflow/python/eager/def_function_xla_jit_test.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import control_flow_util from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test @@ -385,6 +386,24 @@ class DefFunctionTest(test.TestCase): f64_input = constant_op.constant([1.1, 2.2, 3.3], dtype=dtypes.float64) self.assertAllClose([1.1, 3.3, 6.6], f(f64_input)) + def testNoExcessiveRetracing(self): + inner_retracings = 0 + + @def_function.function(experimental_compile=True) + def inner(a, b): + nonlocal inner_retracings + inner_retracings += 1 + return a * b + a + + def outer(a, b): + return inner(a, b) + + func_input = random_ops.random_normal([10, 10]) + for _ in range(2): + def_function.function(outer)(func_input, func_input) + + self.assertEqual(inner_retracings, 1) + if __name__ == '__main__': ops.enable_eager_execution() diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index ff5ab91fcb3..857eb9cb656 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -2982,9 +2982,10 @@ class Function(object): if not executing_eagerly: # We want to force function retracing for each different # XLAControlFlowContext, so add `xla_context_id` to the cache key. - tpu_context = _enclosing_xla_context() - if tpu_context is not None: - xla_context_id = id(tpu_context) + xla_context = _enclosing_xla_context() + if xla_context is not None and \ + xla_context.RequiresUniqueFunctionRetracing(): + xla_context_id = id(xla_context) with ops.init_scope(): # The graph, or whether we're executing eagerly, should be a part of the diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 3398308d42e..748f842a9e0 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3682,6 +3682,11 @@ class XLAControlFlowContext(ControlFlowContext): def AddValue(self, x): return x + def RequiresUniqueFunctionRetracing(self): + """Returns whether the tf.function should be retraced if the context changes. + """ + return False + def from_control_flow_context_def(context_def, import_scope=None): """Deserializes `context_def` into the appropriate ControlFlowContext. diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py index 28eba69b7da..ce3aaa8a058 100644 --- a/tensorflow/python/tpu/tpu.py +++ b/tensorflow/python/tpu/tpu.py @@ -639,6 +639,12 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): def GetControlPivot(self): return self._pivot + def RequiresUniqueFunctionRetracing(self): + # More context: b/158152827. TPU stack uses the TPUReplicateContext to + # create replicated variable handles and cluster TPU computations, thus we + # always retrace a tf.function when the wrapped TPUReplicateContext changes. + return True + class OutsideCompilationV2Context(control_flow_ops.ControlFlowContext): """The context for outside compilation in Tensorflow 2.0. From 0ef962b1a5b4e80a7029b0b159af6817b12a04df Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 9 Jun 2020 22:03:06 -0700 Subject: [PATCH 175/178] Improve precision of Log1p in the range [1e-4, 1e-3]. PiperOrigin-RevId: 315628275 Change-Id: Ia3ca918b81a5d888629fe577fe98d7ae1c94aa46 --- .../compiler/tests/special_math_test.py | 75 ------------------- tensorflow/compiler/tests/unary_ops_test.py | 14 ++-- .../xla/service/elemental_ir_emitter.cc | 47 +----------- .../xla/service/elemental_ir_emitter.h | 4 - 4 files changed, 8 insertions(+), 132 deletions(-) diff --git a/tensorflow/compiler/tests/special_math_test.py b/tensorflow/compiler/tests/special_math_test.py index 246ab2a1641..3efaa6434be 100644 --- a/tensorflow/compiler/tests/special_math_test.py +++ b/tensorflow/compiler/tests/special_math_test.py @@ -61,81 +61,6 @@ def implicit_reparameterization_grad(a, x): return -gen_math_ops.igamma_grad_a(a, x) / prob -@def_function.function(experimental_compile=True) -def _log1p(x): - return math_ops.log1p(x) - - -class Log1pTest(xla_test.XLATestCase, parameterized.TestCase): - - def setUp(self): - if flags.FLAGS.vary_seed: - entropy = os.urandom(64) - if six.PY2: - answer = int(entropy.encode('hex'), 16) - else: - answer = int.from_bytes(entropy, 'big') - np.random.seed(answer % (2**32 - 1)) - super(Log1pTest, self).setUp() - - def adjust_tolerance_for_tpu(self, dtype, rtol, atol): - if self.device not in ['TPU']: - return rtol, atol - - if dtype == np.float32: - return 4e-4, 0. - return 1e-10, 0. - - def _test_range(self, low, high, dtype, rtol, atol, is_negative=False): - # Test values near zero. - rtol, atol = self.adjust_tolerance_for_tpu(dtype, rtol, atol) - x = np.exp(np.random.uniform( - low=low, high=high, size=[NUM_SAMPLES])).astype(dtype) - if is_negative: - x = -x - expected_values = np.log1p(x) - with self.session() as sess: - with self.test_scope(): - actual = _log1p(x) - actual = sess.run(actual) - self.assertAllClose(expected_values, actual, atol=atol, rtol=rtol) - - @parameterized.parameters((np.float32, 1e-7, 0.), - (np.float64, 1e-15, 0.)) - def testSmallX(self, dtype, rtol, atol): - self._test_range(-40., -20., dtype, rtol, atol, is_negative=False) - self._test_range(-40., -20., dtype, rtol, atol, is_negative=True) - - @parameterized.parameters((np.float32, 1e-7, 0.), - (np.float64, 1e-15, 0.)) - def testGreaterThanNegativeTwentyExponent(self, dtype, rtol, atol): - self._test_range(-20., -10., dtype, rtol, atol, is_negative=False) - self._test_range(-20., -10., dtype, rtol, atol, is_negative=True) - - @parameterized.parameters((np.float32, 1e-7, 0.), - (np.float64, 1e-15, 0.)) - def testGreaterThanNegativeTenExponent(self, dtype, rtol, atol): - self._test_range(-10., -5., dtype, rtol, atol, is_negative=False) - self._test_range(-10., -5., dtype, rtol, atol, is_negative=True) - - @parameterized.parameters((np.float32, 2e-7, 0.), - (np.float64, 1e-15, 0.)) - def testGreaterThanNegativeFiveExponent(self, dtype, rtol, atol): - self._test_range(-5., -1., dtype, rtol, atol, is_negative=False) - self._test_range(-5., -1., dtype, rtol, atol, is_negative=True) - - @parameterized.parameters((np.float32, 4e-7, 0.), - (np.float64, 3e-14, 0.)) - def testXGreaterThanOneTenth(self, dtype, rtol, atol): - self._test_range(-1., 0., dtype, rtol, atol, is_negative=False) - self._test_range(-1., 0., dtype, rtol, atol, is_negative=True) - - @parameterized.parameters((np.float32, 2e-7, 0.), - (np.float64, 2e-15, 0.)) - def testXGreaterThanOne(self, dtype, rtol, atol): - self._test_range(0., 3., dtype, rtol, atol, is_negative=False) - - class IgammaTest(xla_test.XLATestCase, parameterized.TestCase): def setUp(self): diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index efea525b6b9..567e75a9a17 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -292,17 +292,13 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[1, 2]], dtype=dtype), expected=np.array([[0.540297, -0.41614]], dtype=dtype)) - # Confirm that log1p will remain precise across a range of small values. self._assertOpOutputMatchesExpected( math_ops.log1p, - np.array([[1e-14, 1e-15, 0.6, 2] + [x * 1e-5 for x in range(1, 20)]], - dtype=dtype), - expected=np.log1p( - np.array( - [[1e-14, 1e-15, 0.6, 2] + [x * 1e-5 for x in range(1, 20)]], - dtype=dtype)).astype(dtype), - rtol=1e-15 if dtype == np.float64 else 1e-4, - atol=1e-15 if dtype == np.float64 else 1e-4) + np.array([[1e-14, 1e-15, 0.6]], dtype=dtype), + expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]], + dtype=dtype)).astype(dtype), + rtol=1e-4, + atol=1e-6) self._assertOpOutputMatchesExpected( math_ops.rint, diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index e4097b0c06f..8cb660de46c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1336,40 +1336,9 @@ StatusOr ElementalIrEmitter::EmitLog1p(PrimitiveType prim_type, // When x is large, the naive evaluation of ln(x + 1) is more // accurate than the Taylor series. TF_ASSIGN_OR_RETURN(auto for_large_x, EmitLog(prim_type, FAdd(x, one))); - // When x is small, (defined to be less than sqrt(2) / 2), use a rational - // approximation. The approximation below is based on one from the Cephes - // Mathematical Library. - // - // sqrt(2) - 1. - const auto kAntilogarithmIsSmallThreshold = 0.41421356237309504880; - - static const std::array kDenominatorCoeffs{ - 1., - 1.5062909083469192043167E1, - 8.3047565967967209469434E1, - 2.2176239823732856465394E2, - 3.0909872225312059774938E2, - 2.1642788614495947685003E2, - 6.0118660497603843919306E1, - }; - - static const std::array kNumeratorCoeffs{ - 4.5270000862445199635215E-5, 4.9854102823193375972212E-1, - 6.5787325942061044846969E0, 2.9911919328553073277375E1, - 6.0949667980987787057556E1, 5.7112963590585538103336E1, - 2.0039553499201281259648E1, - }; - - auto x_squared = FMul(x, x); - TF_ASSIGN_OR_RETURN(auto denominator, - EvaluatePolynomial(type, x, kDenominatorCoeffs)); - TF_ASSIGN_OR_RETURN(auto numerator, - EvaluatePolynomial(type, x, kNumeratorCoeffs)); - auto for_small_x = FDiv(numerator, denominator); - for_small_x = FMul(FMul(x, x_squared), for_small_x); - for_small_x = FAdd(FMul(negative_half, x_squared), for_small_x); - for_small_x = FAdd(x, for_small_x); - + // The Taylor series for ln(x+1) is x - x^2/2 - x^3/3 + …. + auto for_small_x = FMul(FAdd(FMul(negative_half, x), one), x); + const auto kAntilogarithmIsSmallThreshold = 1e-4; auto abs_x = llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::fabs, {value}, {type}, b_); auto x_is_small = FCmpOLT( @@ -2730,14 +2699,4 @@ StatusOr ElementalIrEmitter::EmitElementalReduce( } } -// Evaluate polynomial using Horner's method. -StatusOr ElementalIrEmitter::EvaluatePolynomial( - llvm::Type* type, llvm::Value* x, absl::Span coefficients) { - llvm::Value* poly = llvm::ConstantFP::get(type, 0.0); - for (const double c : coefficients) { - poly = FAdd(FMul(poly, x), llvm::ConstantFP::get(type, c)); - } - return poly; -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index e39d2dd99ec..06a9d7b194c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -258,10 +258,6 @@ class ElementalIrEmitter : public IrBuilderMixin { StatusOr EmitComplexPower(const HloInstruction* op, llvm::Value* a, llvm::Value* b, llvm::Value* c, llvm::Value* d); - - // Evaluates a polynomial using Horner's method. - StatusOr EvaluatePolynomial( - llvm::Type* type, llvm::Value* x, absl::Span coefficients); }; } // namespace xla From 584a042d352c194c848ab8ed6d73da0fe64fb6db Mon Sep 17 00:00:00 2001 From: George Karpenkov Date: Tue, 9 Jun 2020 22:50:25 -0700 Subject: [PATCH 176/178] [XLA/CPU] Support buffer aliasing on XLA:CPU PiperOrigin-RevId: 315633188 Change-Id: Id403065962b3151ebb6c741fcf9ddf4523490cde --- .../xla/service/cpu/cpu_executable.cc | 89 +++++++++++-------- .../compiler/xla/service/cpu/cpu_executable.h | 3 +- .../xla/tests/buffer_donation_test.cc | 3 - 3 files changed, 55 insertions(+), 40 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc index 1abcf17dad3..e0c8adcbbe1 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc @@ -207,7 +207,8 @@ Status CpuExecutable::ExecuteComputeFunction( StatusOr CpuExecutable::CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span buffers) { + absl::Span buffers, + absl::Span arguments) { se::Stream* stream = run_options->stream(); ExecutionOutput result(/*on_host_shape=*/result_shape(), /*on_device_shape=*/result_shape(), @@ -221,7 +222,7 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( // caller. for (auto& p : result.MutableResult()->buffers()) { const ShapeIndex& index = p.first; - se::DeviceMemoryBase& device_memory = p.second; + se::DeviceMemoryBase& result_buffer = p.second; const HloValueSet& sources = this->GetRootValueSet().element(index); // The points to set is unambiguous so the set should be a // singleton. @@ -229,39 +230,54 @@ StatusOr CpuExecutable::CreateResultShapedBuffer( const HloValue* value_source = sources.values()[0]; HloInstruction* src = value_source->instruction(); - // The source for this result buffer can be a nested buffer such as - // a tuple element. The source instruction should have a - // non-parameter buffer assigned. - TF_ASSIGN_OR_RETURN( - const BufferAllocation::Slice slice, - this->assignment_->GetUniqueSlice(src, value_source->index())); - const BufferAllocation::Index buffer_index = slice.index(); - MaybeOwningDeviceMemory& buffer = buffers[buffer_index]; - if (!slice.allocation()->is_entry_computation_parameter()) { - // If the buffer coming out of the result is from a parameter, the - // owning buffer will be null, and that means the caller aliased some - // parameter buffer to an output one (via the - // HloInputOutputAliasConfig API). If that is the case, the caller - // will receive a partially complete scoped shaped buffer, which they - // will have to fill up on return. Unfortunately the interface to the - // execute APIs are ShapedBuffer pointer based, which assumes caller - // ownership, and hence a buffer coming from there cannot be part of - // the new ScopedShapedBuffer we create for the result (which assumes - // ownership). - absl::optional owned_buffer = buffer.Release(); - CHECK(owned_buffer); - device_memory = owned_buffer->Release(); - buffer = device_memory; - } else { - auto output_alias = input_output_alias.GetAliasedOutput( - slice.allocation()->parameter_number(), - slice.allocation()->param_shape_index()); - CHECK(output_alias) << "Output buffer is coming from parameter " - << slice.allocation()->parameter_number() - << " at index " - << slice.allocation()->param_shape_index() - << ", but no alias exists"; - CHECK_EQ(*output_alias, index); + // TODO(cheshire): duplication with other backends. + absl::optional alias = + input_output_alias.GetAliasedParameter(index); + if (alias) { + CHECK_LT(alias->parameter_number, arguments.size()); + ExecutionInput& input = arguments[alias->parameter_number]; + MaybeOwningDeviceMemory* maybe_owning_memory = + input.MutableBuffer(alias->parameter_index); + if (absl::optional owning = + maybe_owning_memory->Release()) { + // If the caller passes the ownership of the device memory, reuse it + // as the output buffer. It is up to the caller whether or not to + // donate a buffer; the aliasing information describes which buffers + // may alias, not buffers that must alias. + se::DeviceMemoryBase argument_buffer = owning->Release(); + *maybe_owning_memory = argument_buffer; + result_buffer = argument_buffer; + if (alias->kind == HloInputOutputAliasConfig::kUserAlias) { + // This is a user alias, so a must alias. The caller is giving us the + // input buffer, but in case of error of the execute call, we should + // not be releasing it as it contains valid data (for example, it is a + // parameter which the user wants us to alias, in a gradient update + // computation). So we store the index into the result in the aliased + // vactor, which will be fed to the ExecutionOutput, which will be + // using the indices to drop the addresses from its own + // ScopedShapedBuffer result, if the ExecutionOutput is not committed. + result.AddAliasedIndex(index); + } + } + } + + if (result_buffer.is_null()) { + // The source for this result buffer can be a nested buffer such as + // a tuple element. The source instruction should have a + // non-parameter buffer assigned. + TF_ASSIGN_OR_RETURN( + const BufferAllocation::Slice slice, + this->assignment_->GetUniqueSlice(src, value_source->index())); + const BufferAllocation::Index buffer_index = slice.index(); + MaybeOwningDeviceMemory& buffer = buffers[buffer_index]; + if (absl::optional owned_buffer = + buffer.Release()) { + result_buffer = owned_buffer->Release(); + buffer = result_buffer; + } else { + result_buffer = buffer.AsDeviceMemoryBase(); + result.AddAliasedIndex(index); + } } } return std::move(result); @@ -303,7 +319,8 @@ StatusOr CpuExecutable::ExecuteAsyncOnStream( TF_ASSIGN_OR_RETURN( ExecutionOutput result, - CreateResultShapedBuffer(run_options, absl::MakeSpan(buffers))); + CreateResultShapedBuffer(run_options, absl::MakeSpan(buffers), + absl::MakeSpan(arguments))); // Logically we want this lambda to capture `buffers` by move, ultimately our // functor needs to be wrapped in an std::function, and that requires its diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h index 97fc6b3a701..c3d4b46ff95 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h @@ -118,7 +118,8 @@ class CpuExecutable : public Executable { // assignment. StatusOr CreateResultShapedBuffer( const ServiceExecutableRunOptions* run_options, - absl::Span buffers); + absl::Span buffers, + absl::Span arguments); // Returns the instruction value set of the root instruction of the entry // computation. Uses dataflow analysis from buffer assignment. diff --git a/tensorflow/compiler/xla/tests/buffer_donation_test.cc b/tensorflow/compiler/xla/tests/buffer_donation_test.cc index af2c0b1b3ce..9cfeefadead 100644 --- a/tensorflow/compiler/xla/tests/buffer_donation_test.cc +++ b/tensorflow/compiler/xla/tests/buffer_donation_test.cc @@ -216,11 +216,8 @@ TEST_F(BufferDonationTest, SimpleWhileTupleTest) { HloInstruction::CreateGetTupleElement(f32v1_, while0, 1)); builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); module->AddEntryComputation(builder.Build()); - // Input output aliasing is supported on CPU and GPU. -#if defined(XLA_TEST_BACKEND_TPU) || defined(XLA_TEST_BACKEND_GPU) TF_ASSERT_OK(module->input_output_alias_config().SetUpAlias({0}, 0, {0})); TF_ASSERT_OK(module->input_output_alias_config().SetUpAlias({1}, 0, {1})); -#endif auto arg = LiteralUtil::MakeTupleFromSlices( {LiteralUtil::CreateR0(0), LiteralUtil::CreateR1({1.1f})}); From 299b1bf0ba5cd88ad469b6ab8576f025f1220fcb Mon Sep 17 00:00:00 2001 From: Kibeom Kim Date: Tue, 9 Jun 2020 23:00:38 -0700 Subject: [PATCH 177/178] Implement fast deferred-decoding Python stack trace class. PiperOrigin-RevId: 315634209 Change-Id: Iac2a77dd98cabafefe54cabec1bdde025950eb8d --- tensorflow/python/BUILD | 14 ++++ tensorflow/python/lib/core/py_util.cc | 4 - tensorflow/python/lib/core/py_util.h | 14 +++- tensorflow/python/util/stack_trace.cc | 52 +++++++++++++ tensorflow/python/util/stack_trace.h | 108 ++++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 5 deletions(-) create mode 100644 tensorflow/python/util/stack_trace.cc create mode 100644 tensorflow/python/util/stack_trace.h diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index b93992246ab..21802a1d819 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1070,6 +1070,7 @@ cc_library( deps = [ "//tensorflow/core:lib", "//tensorflow/core:script_ops_op_lib", + "//tensorflow/core/platform:logging", "//third_party/python_runtime:headers", ], ) @@ -5623,6 +5624,19 @@ tf_py_test( ], ) +cc_library( + name = "stack_trace", + srcs = ["util/stack_trace.cc"], + hdrs = ["util/stack_trace.h"], + deps = [ + ":py_util", + "//third_party/python_runtime:headers", # buildcleaner: keep + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/synchronization", + "@com_google_absl//absl/types:optional", + ], +) + py_library( name = "util", srcs = glob( diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc index 739cab46b10..a78f0a12f21 100644 --- a/tensorflow/python/lib/core/py_util.cc +++ b/tensorflow/python/lib/core/py_util.cc @@ -18,10 +18,6 @@ limitations under the License. // Place `` before to avoid build failure in macOS. #include -// The empty line above is on purpose as otherwise clang-format will -// automatically move before . -#include - #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/strcat.h" diff --git a/tensorflow/python/lib/core/py_util.h b/tensorflow/python/lib/core/py_util.h index a9f39d39461..af1b21699e6 100644 --- a/tensorflow/python/lib/core/py_util.h +++ b/tensorflow/python/lib/core/py_util.h @@ -16,12 +16,24 @@ limitations under the License. #ifndef TENSORFLOW_PYTHON_LIB_CORE_PY_UTIL_H_ #define TENSORFLOW_PYTHON_LIB_CORE_PY_UTIL_H_ +#include + +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { + // Fetch the exception message as a string. An exception must be set // (PyErr_Occurred() must be true). string PyExceptionFetch(); -} // end namespace tensorflow + +// Assert that Python GIL is held. +inline void DCheckPyGilState() { +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 4 + DCHECK(PyGILState_Check()); +#endif +} + +} // namespace tensorflow #endif // TENSORFLOW_PYTHON_LIB_CORE_PY_UTIL_H_ diff --git a/tensorflow/python/util/stack_trace.cc b/tensorflow/python/util/stack_trace.cc new file mode 100644 index 00000000000..cf574f6f292 --- /dev/null +++ b/tensorflow/python/util/stack_trace.cc @@ -0,0 +1,52 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/python/util/stack_trace.h" + +namespace { + +// Returns C string from a Python string object. Handles Python2/3 strings. +// TODO(kkb): This is a generic Python utility function. factor out as a +// utility. +const char* GetPythonString(PyObject* o) { +#if PY_MAJOR_VERSION >= 3 + if (PyBytes_Check(o)) { + return PyBytes_AsString(o); + } else { + return PyUnicode_AsUTF8(o); + } +#else + return PyBytes_AsString(o); +#endif +} +} // namespace + +namespace tensorflow { +std::string StackTrace::ToString() const { + DCheckPyGilState(); + + std::ostringstream result; + for (int i = size_ - 1; i >= 0; --i) { + result << " File \"" << PyUnicode_AsUTF8(code_objs_[i]->co_filename) + << "\", line " + << PyCode_Addr2Line(code_objs_[i], last_instructions_[i]) << ", in " + << GetPythonString(code_objs_[i]->co_name) + << "\n \n"; + // TODO(kkb): Add source code line. See tf_stack.cc's + // FrameSummary::line() function. + } + return result.str(); +} +} // namespace tensorflow diff --git a/tensorflow/python/util/stack_trace.h b/tensorflow/python/util/stack_trace.h new file mode 100644 index 00000000000..0b9a737bf7e --- /dev/null +++ b/tensorflow/python/util/stack_trace.h @@ -0,0 +1,108 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_PYTHON_UTIL_STACK_TRACE_H_ +#define TENSORFLOW_PYTHON_UTIL_STACK_TRACE_H_ + +#include +#include + +#include +#include +#include + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "tensorflow/python/lib/core/py_util.h" + +namespace tensorflow { + +// A class for capturing Python stack trace. +class StackTrace final { + public: + static constexpr int kMaxDepth = 10; + + StackTrace() : size_(0) {} + + // Returns `StackTrace` object that captures the current Python stack trace. + // Python GIL must be acquired beforehand. + ABSL_MUST_USE_RESULT + ABSL_ATTRIBUTE_HOT + static StackTrace Capture() { + DCheckPyGilState(); + + StackTrace result; + const PyFrameObject* frame = PyThreadState_GET()->frame; + int i = 0; + for (; i < kMaxDepth && frame != nullptr; frame = frame->f_back, ++i) { + PyCodeObject* code_obj = frame->f_code; + DCHECK(frame->f_trace == nullptr); + DCHECK(code_obj != nullptr); + + Py_INCREF(code_obj); + result.code_objs_[i] = code_obj; + result.last_instructions_[i] = frame->f_lasti; + } + result.size_ = i; + return result; + } + + // Python GIL must be acquired beforehand. + ABSL_ATTRIBUTE_HOT + ~StackTrace() { Clear(); } + + StackTrace(StackTrace&& other) { + code_objs_ = other.code_objs_; + last_instructions_ = other.last_instructions_; + size_ = other.size_; + other.size_ = 0; + } + + // Python GIL must be acquired beforehand. + ABSL_ATTRIBUTE_HOT + StackTrace& operator=(StackTrace&& other) { + Clear(); + + code_objs_ = other.code_objs_; + last_instructions_ = other.last_instructions_; + size_ = other.size_; + other.size_ = 0; + return *this; + } + + // Returns string representation of the captured stack trace. + std::string ToString() const; + + // TODO(kkb): Implement structured stack trace object getter. + + private: + std::array code_objs_; + std::array last_instructions_; + int size_; + + // Python GIL must be acquired beforehand. + ABSL_ATTRIBUTE_HOT + void Clear() { + DCheckPyGilState(); + for (int i = 0; i < size_; ++i) Py_DECREF(code_objs_[i]); + } + + StackTrace(const StackTrace&) = delete; + StackTrace& operator=(const StackTrace&) = delete; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_PYTHON_UTIL_STACK_TRACE_H_ From 516608035f85cec8b126712b0ff8407220206b22 Mon Sep 17 00:00:00 2001 From: Chao Mei Date: Tue, 9 Jun 2020 23:14:16 -0700 Subject: [PATCH 178/178] Modified BUILD rules to have a more explicit way to enable XNNPACK delegate by default. PiperOrigin-RevId: 315635582 Change-Id: Ib0e0c2aa02530923b105972397b00b76cd66c35c --- tensorflow/lite/BUILD | 43 ++++++++++++++++++++----- tensorflow/lite/delegates/xnnpack/BUILD | 9 ++++++ tensorflow/lite/model_xnnpack_test.cc | 2 +- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD index 06a5fee9739..0eae6ad17c0 100644 --- a/tensorflow/lite/BUILD +++ b/tensorflow/lite/BUILD @@ -332,10 +332,36 @@ cc_library( # Enables applying XNNPACK delegate for float models in TFLite runtime. # WARNING: This build flag is experimental and subject to change. config_setting( - name = "tflite_with_xnnpack_enabled", + name = "tflite_with_xnnpack_explicit_true", values = {"define": "tflite_with_xnnpack=true"}, ) +config_setting( + name = "tflite_with_xnnpack_explicit_false", + values = {"define": "tflite_with_xnnpack=false"}, +) + +cc_library( + name = "tflite_with_xnnpack_enabled", + defines = ["TFLITE_BUILD_WITH_XNNPACK_DELEGATE"], + visibility = ["//visibility:private"], + deps = [ + "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", + ], +) + +cc_library( + name = "tflite_with_xnnpack_default", + visibility = ["//visibility:private"], + # TODO(b/151246885): put ":tflite_with_xnnpack_enabled" to macos/windows + # once we have a good testing coverage on these two platforms. + deps = select({ + "//tensorflow:macos": [], + "//tensorflow:windows": [], + "//conditions:default": [], + }), +) + cc_library( name = "tflite_with_xnnpack_optional", srcs = ["tflite_with_xnnpack_optional.cc"], @@ -344,17 +370,18 @@ cc_library( "tflite_with_xnnpack_optional.h", ], copts = tflite_copts() + TFLITE_DEFAULT_COPTS, - defines = select({ - ":tflite_with_xnnpack_enabled": ["TFLITE_BUILD_WITH_XNNPACK_DELEGATE"], - "//conditions:default": [], - }), deps = [ "//tensorflow/lite/c:common", ] + select({ - ":tflite_with_xnnpack_enabled": [ - "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate", + ":tflite_with_xnnpack_explicit_true": [ + "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate_hdrs_only", + ":tflite_with_xnnpack_enabled", + ], + ":tflite_with_xnnpack_explicit_false": [], + "//conditions:default": [ + "//tensorflow/lite/delegates/xnnpack:xnnpack_delegate_hdrs_only", + ":tflite_with_xnnpack_default", ], - "//conditions:default": [], }), ) diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD index 455da14e8fe..e2679580e69 100644 --- a/tensorflow/lite/delegates/xnnpack/BUILD +++ b/tensorflow/lite/delegates/xnnpack/BUILD @@ -29,6 +29,15 @@ cc_library( ], ) +cc_library( + name = "xnnpack_delegate_hdrs_only", + hdrs = ["xnnpack_delegate.h"], + visibility = ["//tensorflow/lite:__subpackages__"], + deps = [ + "//tensorflow/lite/c:common", + ], +) + cc_library( name = "xnnpack_delegate_test_mode", srcs = ["xnnpack_delegate.cc"], diff --git a/tensorflow/lite/model_xnnpack_test.cc b/tensorflow/lite/model_xnnpack_test.cc index 1a7376bfdcf..73860807c00 100644 --- a/tensorflow/lite/model_xnnpack_test.cc +++ b/tensorflow/lite/model_xnnpack_test.cc @@ -36,7 +36,7 @@ TEST(FloatModel, WithXnnpackDelegate) { ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk); -#if TFLITE_HAS_ATTRIBUTE_WEAK +#if TFLITE_HAS_ATTRIBUTE_WEAK || defined(TFLITE_BUILD_WITH_XNNPACK_DELEGATE) // As the graph is fully delegated by XNNPACK delegate, we will expect the // following: EXPECT_EQ(1, interpreter->execution_plan().size());