From 640edaeef6c3a7ae8b84381be74665b3e5b26981 Mon Sep 17 00:00:00 2001 From: Dayananda-V Date: Tue, 2 Apr 2019 15:56:48 +0530 Subject: [PATCH 001/253] TfLite one_hot int8 and unit8 feature support 1- int8 and uint8 data type support change 2- supported data type test coverage added --- tensorflow/lite/kernels/one_hot.cc | 8 ++++++++ tensorflow/lite/kernels/one_hot_test.cc | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tensorflow/lite/kernels/one_hot.cc b/tensorflow/lite/kernels/one_hot.cc index 2ac12fe9308..25655a8aae2 100644 --- a/tensorflow/lite/kernels/one_hot.cc +++ b/tensorflow/lite/kernels/one_hot.cc @@ -128,6 +128,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt16: case kTfLiteInt32: case kTfLiteInt64: + case kTfLiteInt8: + case kTfLiteUInt8: case kTfLiteBool: op_context.output->type = op_context.dtype; break; @@ -172,6 +174,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteInt64: OneHotCompute(op_context); break; + case kTfLiteInt8: + OneHotCompute(op_context); + break; + case kTfLiteUInt8: + OneHotCompute(op_context); + break; case kTfLiteBool: OneHotCompute(op_context); break; diff --git a/tensorflow/lite/kernels/one_hot_test.cc b/tensorflow/lite/kernels/one_hot_test.cc index 85438327e7e..4f0353016c9 100644 --- a/tensorflow/lite/kernels/one_hot_test.cc +++ b/tensorflow/lite/kernels/one_hot_test.cc @@ -83,6 +83,26 @@ TEST(OneHotOpTest, BasicInt) { EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 0, 0, 0, 1, 0, 0, 0, 1})); } +TEST(OneHotOpTest, BasicInt8) { + const int depth = 3; + OneHotOpModel model({3}, depth, TensorType_INT8); + model.SetIndices({0, 1, 2}); + model.Invoke(); + + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({3, 3})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 0, 0, 0, 1, 0, 0, 0, 1})); +} + +TEST(OneHotOpTest, BasicUint8) { + const int depth = 3; + OneHotOpModel model({3}, depth, TensorType_UINT8); + model.SetIndices({0, 1, 2}); + model.Invoke(); + + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({3, 3})); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({1, 0, 0, 0, 1, 0, 0, 0, 1})); +} + TEST(OneHotOpTest, BasicBool) { const int depth = 3; OneHotOpModel model({3}, depth, TensorType_BOOL); From a8c6704074385dc2e4dd3f8282a0df86606a4dc5 Mon Sep 17 00:00:00 2001 From: Tom Carchrae Date: Mon, 3 Feb 2020 15:43:41 -0800 Subject: [PATCH 002/253] git is required by several tutorial examples --- .../tools/dockerfiles/partials/jupyter.partial.Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile index b23671fe12d..3ffc295f09b 100644 --- a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile @@ -7,6 +7,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb From 3b29c0b1eaf83466ca2a31daf2a807fa3dd4a3d6 Mon Sep 17 00:00:00 2001 From: Tom Carchrae Date: Tue, 11 Feb 2020 10:08:04 -0800 Subject: [PATCH 003/253] update generated files --- tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile | 2 ++ .../tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile | 2 ++ .../tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile | 2 ++ tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile | 2 ++ .../dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile | 2 ++ .../dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile | 2 ++ .../dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile | 2 ++ .../dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile | 2 ++ .../dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile | 2 ++ .../dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile | 2 ++ 10 files changed, 20 insertions(+) diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile index 66339bd95e1..6a8d0018cc4 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile @@ -68,6 +68,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile index bdaf0116f1f..5be51a57620 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile @@ -120,6 +120,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile index 96274dbbdab..7aecc369cd5 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile @@ -162,6 +162,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile index 488d91c493b..8cc9dc68f2a 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile @@ -117,6 +117,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile index 4dee6e57f2d..129fc78db54 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/devel-horovod-jupyter.Dockerfile @@ -172,6 +172,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile index bc68a8a2e47..a9fccf46b81 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/mkl_horovod/horovod-jupyter.Dockerfile @@ -120,6 +120,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile index 42a8dd9be66..4dae9f50c4b 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/cpu-ppc64le-jupyter.Dockerfile @@ -86,6 +86,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile index dce8df78c4c..0870cfc83f9 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-cpu-ppc64le-jupyter.Dockerfile @@ -121,6 +121,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile index aa20f376cd8..cec115afea9 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/devel-gpu-ppc64le-jupyter.Dockerfile @@ -163,6 +163,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb diff --git a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile index 24c22f90df8..93ac1fc1bbc 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/ppc64le/gpu-ppc64le-jupyter.Dockerfile @@ -135,6 +135,8 @@ RUN jupyter serverextension enable --py jupyter_http_over_ws RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local RUN apt-get install -y --no-install-recommends wget +# some examples require git to fetch dependencies +RUN apt-get install -y --no-install-recommends git WORKDIR /tf/tensorflow-tutorials RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/classification.ipynb RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/overfit_and_underfit.ipynb From dfc227d6e2a06655cef8661fb883150aafe4f1e2 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 13 Feb 2020 14:20:06 +0100 Subject: [PATCH 004/253] Add TRT profile generation mode --- .../tf2tensorrt/kernels/trt_engine_op.cc | 38 +++++++++-- .../python/compiler/tensorrt/trt_convert.py | 64 ++++++++++++++++++- 2 files changed, 93 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index 729c137b426..af6d509a0b2 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -153,6 +153,10 @@ class TRTEngineOp : public AsyncOpKernel { // Whether to use implicit batch dimension for TensorRT. bool use_implicit_batch_; + // Whether to collect optimization profiles for TensorRT, only used when + // use_implicit_batch_=false. + bool profile_generation_mode_; + // Maximum number of cached engines. int max_cached_engines_; @@ -311,7 +315,19 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) use_implicit_batch_ = true; } #endif + status = + context->GetAttr("_profile_generation_mode", profile_generation_mode_); + if (status.code() == tensorflow::error::NOT_FOUND) { + VLOG(2) << "Not found _profile_generation_mode in " + << context->device()->name() + << ", thus setting _profile_generation_mode=false"; + profile_generation_mode_ = false; + } if (use_implicit_batch_) { + OP_REQUIRES(context, !profile_generation_mode_, + errors::InvalidArgument( + "profile_generation_mode_=true is only supported if " + "use_implicit_batch=false")); if (input_partial_shapes_.empty()) { VLOG(1) << "Attribute input_shapes is not set. This happens probably " << "because you are using a model that is already converted " @@ -536,12 +552,22 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, OP_REQUIRES_OK_ASYNC(ctx, VerifyInputShapes(input_concrete_shapes), *helper); if (!use_implicit_batch_) { - if (cache_res->profiles_.GetNumProfiles() == 0) { - // Create a single profile from the current input shape. - // In the future we will collect a set of input shapes during build mode - // and create profiles for each of them. - cache_res->profiles_.AddShape(input_concrete_shapes); - cache_res->profiles_.InitProfiles(); + if (profile_generation_mode_) { + // Collecting new shapes for profiles can be only done once. After + // the shapes are converted to TRT profiles, no shapes can be collected + // anymore. + OP_REQUIRES(ctx, cache_res->profiles_.GetNumProfiles() == 0, + errors::Unimplemented("Cannot collect new shapes when " + "profiles are already created.")); + // Just collect the input shape info and return. The shapes are used to + // generate optimization profiles during engine creation. + cache_res->profiles_.addShape(input_concrete_shapes); + VLOG(1) << "Native segment is used during collecting shapes for profiles"; + ExecuteNativeSegment(ctx, helper); + return; + } else if (cache_res->profiles_.GetNumProfiles() == 0) { + // Create profiles out of collected shapes during profile generation. + cache_res->profiles_.initProfiles(); } } StatusOr> status = diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index 2ea22ebba49..c8c9b125bc6 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -22,6 +22,7 @@ import collections import os import platform import tempfile +from functools import partial import six as _six @@ -972,6 +973,8 @@ class TrtGraphConverterV2(object): self._input_saved_model_signature_key = ( input_saved_model_signature_key or signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) + self._rewriter_config = get_tensorrt_rewriter_config( + conversion_params=self._conversion_params, is_v2=True) self._need_calibration = ( conversion_params.precision_mode == TrtPrecisionMode.INT8 and @@ -980,6 +983,15 @@ class TrtGraphConverterV2(object): raise ValueError("INT8 precision mode with calibration is not supported " "with static TensorRT ops. Set is_dynamic_op to True.") + # rewriter_config is already validated + self._need_trt_profiles = None + for optimizer in self._rewriter_config.custom_optimizers: + if optimizer.name == "TensorRTOptimizer": + self._need_trt_profiles = not optimizer.parameter_map[ + "use_implicit_batch"].b \ + if "use_implicit_batch" in optimizer.parameter_map else False + assert self._need_trt_profiles != None + self._converted = False self._build_called_once = False @@ -992,11 +1004,9 @@ class TrtGraphConverterV2(object): Returns: The optimized GraphDef. """ - rewriter_config = get_tensorrt_rewriter_config( - conversion_params=self._conversion_params, is_v2=True) grappler_session_config = config_pb2.ConfigProto() grappler_session_config.graph_options.rewrite_options.CopyFrom( - rewriter_config) + self._rewriter_config) return tf_optimizer.OptimizeGraph( grappler_session_config, meta_graph_def, graph_id=b"tf_graph") @@ -1112,8 +1122,50 @@ class TrtGraphConverterV2(object): raise RuntimeError("input_fn is None. Method build() needs input_fn " "to be specified in order to build TensorRT engines") + def _rebuild_func(): + # Rebuild function from graph_def. + reset_converted_func = wrap_function.function_from_graph_def( + self._converted_graph_def, + [tensor.name for tensor in self._converted_func.inputs], + [tensor.name for tensor in self._converted_func.outputs]) + reset_converted_func.graph.structured_outputs = nest.pack_sequence_as( + self._converted_func.graph.structured_outputs, + reset_converted_func.graph.structured_outputs) + self._converted_func = reset_converted_func + + def _set_profile_generation_mode(value, node): + node.attr["_profile_generation_mode"].b = value + + if self._need_trt_profiles: + # Enable profile generation. + self._for_each_trt_node(self._converted_graph_def, + partial(_set_profile_generation_mode, True)) + # Profile generation is enabled using the _profile_generation_mode + # attribute of the TRTEngineOps. We need to rebuild the function to + # change this attribute. + _rebuild_func() + + # Use the first input in explicit batch mode to build TensorRT engines + # after generating all the profiles. The first input is used but any of + # the inputs can be used because the shape of this input does not + # determine the engine and instead the shapes collected in profiles + # determine the engine. + first_input = None + # Run inference: + # Builds TRT engines if self._need_trt_profiles is False. + # Builds TRT optimization profiles if self._need_trt_profiles is True. for inp in input_fn(): + if not first_input: + first_input = inp self._converted_func(*map(ops.convert_to_tensor, inp)) + if self._need_trt_profiles: + # Disable profile generation. + self._for_each_trt_node(self._converted_graph_def, + partial(_set_profile_generation_mode, False)) + _rebuild_func() + # Run inference to build TensorRT engines out of generated optimization + # profiles. + self._converted_func(*map(ops.convert_to_tensor, first_input)) self._build_called_once = True @@ -1125,6 +1177,12 @@ class TrtGraphConverterV2(object): """ assert self._converted + if self._need_trt_profiles and not self._build_called_once: + raise NotImplementedError( + "build() is not called . Explicit batch mode " + "(use_implicit_batch=False) requires generating TensorRT optimization" + " profiles which is done by calling build().") + # Serialize the TRT engines in the cache if any, and create trackable # resource to track them. engine_asset_dir = tempfile.mkdtemp() From 7f7fcf750897d9dcf4189d4031f3824388a40ba9 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 13 Feb 2020 14:20:58 +0100 Subject: [PATCH 005/253] Run profile generation mode if needed --- .../test/tf_trt_integration_test_base.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py index 9c9f604ae04..7d3cc8ea564 100644 --- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py @@ -468,6 +468,19 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): converter.save(trt_saved_model_dir) return trt_saved_model_dir + def _NeedToBuild(self, conversion_params): + """ Whether we need to call converter.build(). + Currently we need to build if we have explicit batch (dynamic shapes) + """ + config = conversion_params.rewriter_config_template + if config is None: + return False + for optimizer in config.custom_optimizers: + if optimizer.name == 'TensorRTOptimizer': + if "use_implicit_batch" in optimizer.parameter_map: + return not optimizer.parameter_map["use_implicit_batch"].b + return False + def _GetInferGraph(self, run_params, saved_model_dir): """Return trt converted graphdef.""" conversion_params = self.GetConversionParams(run_params) @@ -479,6 +492,15 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): converter = self._CreateConverter(run_params, saved_model_dir, session_config, conversion_params) converter.convert() + + if self._NeedToBuild(conversion_params): + logging.info("Need to start build mode") + def _BuildInputFn(): + for shapes in self._GetParamsCached().input_dims: + yield [np.zeros(x).astype(np.float32) for x in shapes] + + converter.build(input_fn=_BuildInputFn) + trt_saved_model_dir = self._GetSavedModelDir(run_params, GraphState.INFERENCE) converter.save(trt_saved_model_dir) From c7ce71f168bb2be59ec7a22117cecb8466872960 Mon Sep 17 00:00:00 2001 From: Frederic Bastien Date: Fri, 14 Feb 2020 07:56:25 -0800 Subject: [PATCH 006/253] [XLA] Fix a latent bug. Currently I think the code is fine, but the function doesn't do what its name say. --- tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index c5353256e27..ec921385487 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -506,7 +506,7 @@ llvm::Value* IsBlock0Thread0(llvm::IRBuilder<>* b) { EmitCallToTargetIntrinsic(TargetIntrinsicID::kThreadIdx, {}, {}, b)), b->CreateICmpEQ( b->getInt32(0), - EmitCallToTargetIntrinsic(TargetIntrinsicID::kThreadIdx, {}, {}, b))); + EmitCallToTargetIntrinsic(TargetIntrinsicID::kBlockIdx, {}, {}, b))); } bool AreFusedReductionOutputsConsistent( From 4643aa057de812175473a4be53fa9d2cd173d3b7 Mon Sep 17 00:00:00 2001 From: Xiaoquan Kong Date: Mon, 17 Feb 2020 00:43:09 +0800 Subject: [PATCH 007/253] docfix: fix docstring error in KerasHistory --- tensorflow/python/keras/engine/base_layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index 24d3432fb8e..58d31693689 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -2758,7 +2758,7 @@ class KerasHistory( layer: The Layer that produced the Tensor. node_index: The specific call to the Layer that produced this Tensor. Layers can be called multiple times in order to share weights. A new node is - created every time a Tensor is called. + created every time a Layer is called. tensor_index: The output index for this Tensor. Always zero if the Layer that produced this Tensor only has one output. Nested structures of Tensors are deterministically assigned an index via `nest.flatten`. From b192692e47c44be57ed5bbc40dfcc2d05dcd2501 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Mon, 17 Feb 2020 13:45:48 +0700 Subject: [PATCH 008/253] Add sorted builtin support for autograph --- .../python/autograph/operators/py_builtins.py | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index c6ae65ff412..c8cef6bbe8f 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -38,6 +38,9 @@ from tensorflow.python.ops import gen_parsing_ops from tensorflow.python.ops import gen_string_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sort_ops +from tensorflow.python.ops.parallel_for import control_flow_ops as parallel_ops +from tensorflow.python.ops import check_ops from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest @@ -461,8 +464,41 @@ def _py_all(iterable): return all(iterable) +def sorted_(iterable, key=UNSPECIFIED, reverse=UNSPECIFIED): + if tensor_util.is_tensor(iterable): + return _tf_sorted(iterable, key, reverse) + return _py_sorted(iterable, key, reverse) + + +def _tf_sorted(iterable, key, reverse): + """Overload of sorted_ for Tensor iterable.""" + if reverse is UNSPECIFIED: + direction = 'ASCENDING' + else: + direction = 'DESCENDING' + if key is not UNSPECIFIED: + mapped = parallel_ops.vectorized_map(key, iterable) + with ops.control_dependencies( + [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]): + order = sort_ops.argsort(mapped, direction=direction) + return array_ops.gather_v2(iterable, order) + with ops.control_dependencies( + [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]): + return sort_ops.sort(iterable, direction=direction) + + +def _py_sorted(iterable, key, reverse): + if key is not UNSPECIFIED and reverse is UNSPECIFIED: + return sorted(iterable, key=key) + if key is UNSPECIFIED and reverse is not UNSPECIFIED: + return sorted(iterable, reverse=reverse) + if key is not UNSPECIFIED and reverse is not UNSPECIFIED: + return sorted(iterable, key=key, reverse=reverse) + return sorted(iterable) + + SUPPORTED_BUILTINS = (abs, float, int, len, print, range, enumerate, zip, map, - filter, any, all) + filter, any, all, sorted) if six.PY2: SUPPORTED_BUILTINS += (xrange,) @@ -482,4 +518,5 @@ BUILTIN_FUNCTIONS_MAP = { 'filter': filter_, 'any': any_, 'all': all_, + 'sorted': sorted_, } From 3e5a96f7ed3ca77d4cb813a8d1fcf35eed18de5c Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Mon, 17 Feb 2020 13:46:08 +0700 Subject: [PATCH 009/253] Add test case for sorted builtin support in autograph --- .../autograph/operators/py_builtins_test.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py index 10f6d2f9392..e0434a2c3c4 100644 --- a/tensorflow/python/autograph/operators/py_builtins_test.py +++ b/tensorflow/python/autograph/operators/py_builtins_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -373,6 +374,40 @@ class PyBuiltinsTest(test.TestCase): with self.assertRaises(ValueError): py_builtins.all_(dataset_mixed) + def test_sorted(self): + self.assertListEqual(py_builtins.sorted_([2, 3, 1]), [1, 2, 3]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], key=lambda x: -x), [3, 2, 1]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], reverse=True), [3, 2, 1]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], key=lambda x: -x, reverse=True), + [1, 2, 3]) + self.assertListEqual( + py_builtins.sorted_([[4, 3], [2, 1]], key=lambda x: sum(x)), + [[2, 1], [4, 3]]) + + def test_sorted_tensor(self): + iterable_1 = constant_op.constant([2, 3, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1))), [1, 2, 3]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, key=lambda x: -x))), [3, 2, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, reverse=True))), [3, 2, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, key=lambda x: -x, reverse=True))), + [1, 2, 3]) + + iterable_2 = constant_op.constant([[4, 3], [2, 1]]) + with self.assertRaises(ValueError): + py_builtins.sorted_(iterable_2) + with self.assertRaises(ValueError): + py_builtins.sorted_(iterable_2, key=lambda x: -x) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_2, key=lambda x: math_ops.reduce_sum(x)))), + [[2, 1], [4, 3]]) + if __name__ == '__main__': test.main() From 87ea1c309d1c4506fd891c9b4bea4aeaf7900231 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 17 Feb 2020 15:00:14 +0100 Subject: [PATCH 010/253] Add extra inputs and outputs for BuildParamsWithMask --- .../tensorrt/test/tf_trt_integration_test_base.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py index 7d3cc8ea564..0f892e3d3ff 100644 --- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py @@ -195,7 +195,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): output_shapes, input_mask, output_mask) def BuildParamsWithMask(self, graph_fn, dtype, input_shapes, output_shapes, - input_mask, output_mask): + input_mask, output_mask, extra_inputs=[], + extra_outputs=[]): """Build test parameters with static or dynamic input shapes. To define dynamic shapes give a boolean mask that describes which @@ -214,6 +215,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): output_shapes: The output shapes. input_mask: The input shape masks. output_mask: the output shape masks. + extra_inputs: list of additional input shapes + extra_outputs: list of additional outputs shapes Returns: The test parameters. @@ -229,6 +232,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): assert len(input_mask) == len(input_shapes) assert len(output_mask) == len(output_shapes) + for extra_in_shape, extra_out_shape in zip(extra_inputs, extra_outputs): + assert len(input_shapes) == len(extra_in_shape) + assert len(output_shapes) == len(extra_out_shape) return TfTrtIntegrationTestParams( graph_fn=graph_fn, @@ -240,8 +246,8 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): self._GetTensorSpec(shape, mask, dtype, "output_%d" % i) for i, (shape, mask) in enumerate(zip(output_shapes, output_mask)) ], - input_dims=[input_shapes], - expected_output_dims=[output_shapes]) + input_dims=[input_shapes] + extra_inputs, + expected_output_dims=[output_shapes] + extra_outputs) def GetParams(self): """Return a TfTrtIntegrationTestParams for test, implemented by subclass.""" From f6e3aeb82e08349d6a84f3dd05a3737d7c45f2be Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Tue, 18 Feb 2020 13:50:07 +0700 Subject: [PATCH 011/253] add an extra check in case the rank is static --- tensorflow/python/autograph/operators/py_builtins.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index c8cef6bbe8f..b0db94cb1e2 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -478,12 +478,16 @@ def _tf_sorted(iterable, key, reverse): direction = 'DESCENDING' if key is not UNSPECIFIED: mapped = parallel_ops.vectorized_map(key, iterable) + if mapped.shape.rank is not None and mapped.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]): order = sort_ops.argsort(mapped, direction=direction) return array_ops.gather_v2(iterable, order) + if iterable.shape.rank is not None and iterable.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]): return sort_ops.sort(iterable, direction=direction) From 87ec5d0ae000722ca19330d5c00a201c4a6d5014 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 18 Feb 2020 20:40:53 +0100 Subject: [PATCH 012/253] Fix errors due to function renaming --- tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index 737c064fa04..a82e5b61fda 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -316,7 +316,7 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) } #endif status = - context->GetAttr("_profile_generation_mode", profile_generation_mode_); + context->GetAttr("_profile_generation_mode", &profile_generation_mode_); if (status.code() == tensorflow::error::NOT_FOUND) { VLOG(2) << "Not found _profile_generation_mode in " << context->device()->name() @@ -561,13 +561,13 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx, "profiles are already created.")); // Just collect the input shape info and return. The shapes are used to // generate optimization profiles during engine creation. - cache_res->profiles_.addShape(input_concrete_shapes); + cache_res->profiles_.AddShape(input_concrete_shapes); VLOG(1) << "Native segment is used during collecting shapes for profiles"; ExecuteNativeSegment(ctx, helper); return; } else if (cache_res->profiles_.GetNumProfiles() == 0) { // Create profiles out of collected shapes during profile generation. - cache_res->profiles_.initProfiles(); + cache_res->profiles_.InitProfiles(); } } StatusOr> status = From 590a8be726b2cf4831f714881ad9667655e8a3ac Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 18 Feb 2020 20:41:27 +0100 Subject: [PATCH 013/253] Test optimization profiles in dynamic shape mode --- tensorflow/python/compiler/tensorrt/test/trt_mode_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py b/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py index 9a823ab56d4..8397d843889 100644 --- a/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py +++ b/tensorflow/python/compiler/tensorrt/test/trt_mode_test.py @@ -137,10 +137,14 @@ class DynamicShapesTest(TrtModeTestBase): """ def GetParams(self): - """We specify input/output mask with dynamic (unknown) shapes.""" + """We specify input/output mask with dynamic (unknown) shapes. A single + engine with three optimization profiles can handle the three different + input shapes.""" return self.BuildParamsWithMask( self.GraphFn, dtypes.float32, [[1, 12, 5]], [[12, 5]], + extra_inputs=[[[1, 2, 3]], [[1, 4, 6]]], + extra_outputs=[[[2, 3]], [[4, 6]]], input_mask=[[False, False, False]], output_mask=[[False, False]]) From 703211fbf1b131a253a0b45bdf227e87edec723c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 17 Feb 2020 21:08:06 +0100 Subject: [PATCH 014/253] Fix bad_function_call --- tensorflow/core/kernels/ops_testutil.cc | 6 +++++- tensorflow/core/kernels/ops_testutil.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/ops_testutil.cc b/tensorflow/core/kernels/ops_testutil.cc index 3dab8bf2f50..614e184b0b2 100644 --- a/tensorflow/core/kernels/ops_testutil.cc +++ b/tensorflow/core/kernels/ops_testutil.cc @@ -71,6 +71,9 @@ OpsTestBase::OpsTestBase() : device_type_(DEVICE_CPU) { auto device = DeviceFactory::NewDevice("CPU", {}, "/job:a/replica:0/task:0"); CHECK(device) << "Could not create CPU device"; + thread_pool_ = absl::make_unique( + Env::Default(), /*name=*/"default", /*num_threads=*/1); + device_ = device.get(); device_mgr_ = absl::make_unique(std::move(device)); @@ -104,7 +107,8 @@ void OpsTestBase::SetDevice(const DeviceType& device_type, device_mgr_ = absl::make_unique(std::move(device)); pflr_ = absl::make_unique( device_mgr_.get(), Env::Default(), /*config=*/nullptr, - TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions()); + TF_GRAPH_DEF_VERSION, flib_def_.get(), OptimizerOptions(), + thread_pool_.get()); device_type_ = device_type; #ifdef GOOGLE_CUDA diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h index ab7b994d9d2..f6821e3c49c 100644 --- a/tensorflow/core/kernels/ops_testutil.h +++ b/tensorflow/core/kernels/ops_testutil.h @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/platform/threadpool.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/function.h" @@ -183,6 +184,7 @@ class OpsTestBase : public ::testing::Test { std::unique_ptr flib_def_; std::unique_ptr pflr_; + std::unique_ptr thread_pool_; private: TF_DISALLOW_COPY_AND_ASSIGN(OpsTestBase); From 1c978d506559b9e1aee78a83ee3499b3b925beab Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Wed, 19 Feb 2020 17:58:56 +0700 Subject: [PATCH 015/253] fix build failures --- tensorflow/python/autograph/operators/py_builtins.py | 6 ++++-- tensorflow/python/autograph/operators/py_builtins_test.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index b0db94cb1e2..5ce0a3fdc59 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -481,13 +481,15 @@ def _tf_sorted(iterable, key, reverse): if mapped.shape.rank is not None and mapped.shape.rank != 1: raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]): + [check_ops.assert_rank_v2( + mapped, 1, 'sort only supports only 1D tensors')]): order = sort_ops.argsort(mapped, direction=direction) return array_ops.gather_v2(iterable, order) if iterable.shape.rank is not None and iterable.shape.rank != 1: raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]): + [check_ops.assert_rank_v2( + iterable, 1, 'sort only supports only 1D tensors')]): return sort_ops.sort(iterable, direction=direction) diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py index e0434a2c3c4..21a154867a7 100644 --- a/tensorflow/python/autograph/operators/py_builtins_test.py +++ b/tensorflow/python/autograph/operators/py_builtins_test.py @@ -397,7 +397,7 @@ class PyBuiltinsTest(test.TestCase): py_builtins.sorted_(iterable_1, reverse=True))), [3, 2, 1]) self.assertListEqual(list(self.evaluate( py_builtins.sorted_(iterable_1, key=lambda x: -x, reverse=True))), - [1, 2, 3]) + [1, 2, 3]) iterable_2 = constant_op.constant([[4, 3], [2, 1]]) with self.assertRaises(ValueError): @@ -406,7 +406,7 @@ class PyBuiltinsTest(test.TestCase): py_builtins.sorted_(iterable_2, key=lambda x: -x) self.assertListEqual(list(self.evaluate( py_builtins.sorted_(iterable_2, key=lambda x: math_ops.reduce_sum(x)))), - [[2, 1], [4, 3]]) + [[2, 1], [4, 3]]) if __name__ == '__main__': From c99b5f13310cf319fb8f8addb8840506f85d226e Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Thu, 20 Feb 2020 11:07:09 +0700 Subject: [PATCH 016/253] resolve missing dependency build failures --- tensorflow/python/autograph/operators/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index 0969606670a..fe15cc9fd7f 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -48,6 +48,7 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/autograph/utils", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/ops/parallel_for:control_flow_ops", "//third_party/py/numpy", ], ) From b611fba6edd18b2c1efd96d13ee971abe2641a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Thu, 20 Feb 2020 13:00:00 +0100 Subject: [PATCH 017/253] TFLu: Update filtered tests for stm32f4 Removing depthwise_conv_test and conv_test from filter list as they are now working with Renode. --- tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc b/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc index f9451cc6db3..539f4528d06 100644 --- a/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/stm32f4_makefile.inc @@ -56,12 +56,11 @@ ifeq ($(TARGET), stm32f4) $(MAKEFILE_DIR)/downloads/stm32_bare_lib/source/debug_log.c MICROLITE_CC_SRCS := $(filter-out $(EXCLUDED_SRCS), $(MICROLITE_CC_SRCS)) TEST_SCRIPT := tensorflow/lite/micro/testing/test_stm32f4_binary.sh - # TODO, non working tests.. the micro_speech example and conv_test.cc/depthwise_conv_test.cc partly works + # TODO, non working tests.. the micro_speech example partly works EXCLUDED_TESTS := \ tensorflow/lite/micro/micro_interpreter_test.cc \ tensorflow/lite/micro/micro_allocator_test.cc \ tensorflow/lite/micro/memory_helpers_test.cc \ - tensorflow/lite/micro/kernels/depthwise_conv_test.cc \ tensorflow/lite/micro/kernels/logistic_test.cc \ tensorflow/lite/micro/kernels/logical_test.cc \ tensorflow/lite/micro/kernels/maximum_minimum_test.cc \ @@ -77,7 +76,6 @@ ifeq ($(TARGET), stm32f4) tensorflow/lite/micro/kernels/dequantize_test.cc \ tensorflow/lite/micro/kernels/unpack_test.cc \ tensorflow/lite/micro/kernels/split_test.cc \ - tensorflow/lite/micro/kernels/conv_test.cc \ tensorflow/lite/micro/simple_tensor_allocator_test.cc MICROLITE_TEST_SRCS := $(filter-out $(EXCLUDED_TESTS), $(MICROLITE_TEST_SRCS)) EXCLUDED_EXAMPLE_TESTS := \ From d3b4f570ed8225edf21dd1749f5725fedeb7d9ae Mon Sep 17 00:00:00 2001 From: amoitra Date: Thu, 20 Feb 2020 16:13:26 -0800 Subject: [PATCH 018/253] mode change --- tensorflow/compiler/xla/service/BUILD | 0 tensorflow/compiler/xla/service/gpu/BUILD | 0 tensorflow/compiler/xla/service/hlo_instruction.cc | 0 tensorflow/compiler/xla/service/hlo_instructions.h | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensorflow/compiler/xla/service/BUILD mode change 100755 => 100644 tensorflow/compiler/xla/service/gpu/BUILD mode change 100755 => 100644 tensorflow/compiler/xla/service/hlo_instruction.cc mode change 100755 => 100644 tensorflow/compiler/xla/service/hlo_instructions.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD old mode 100755 new mode 100644 diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD old mode 100755 new mode 100644 diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc old mode 100755 new mode 100644 diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h old mode 100755 new mode 100644 From 28eeb04faba3e18d4030af13492298b074b39ee0 Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Wed, 16 Oct 2019 00:50:03 -0700 Subject: [PATCH 019/253] Improve and fix AWS SDK Logging --- tensorflow/core/platform/s3/aws_logging.cc | 87 ++++++++++++---------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc index e0ec94a269f..b73d0e4d5f9 100644 --- a/tensorflow/core/platform/s3/aws_logging.cc +++ b/tensorflow/core/platform/s3/aws_logging.cc @@ -26,6 +26,23 @@ limitations under the License. namespace tensorflow { +static const map log_levels_string_to_aws = { + {"off", Aws::Utils::Logging::LogLevel::Off}, + {"fatal", Aws::Utils::Logging::LogLevel::Fatal}, + {"error", Aws::Utils::Logging::LogLevel::Error}, + {"warn", Aws::Utils::Logging::LogLevel::Warn}, + {"info", Aws::Utils::Logging::LogLevel::Info}, + {"debug", Aws::Utils::Logging::LogLevel::Debug}, + {"trace", Aws::Utils::Logging::LogLevel::Trace} +} + +static const map log_levels_tf_to_aws = { + {INFO, Aws::Utils::Logging::LogLevel::Info}, + {WARNING, Aws::Utils::Logging::LogLevel::Warn}, + {ERROR, Aws::Utils::Logging::LogLevel::Error}, + {FATAL, Aws::Utils::Logging::LogLevel::Fatal} +} + AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {} @@ -64,7 +81,7 @@ void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, LOG(FATAL) << message; break; default: - LOG(ERROR) << message; + LOG(INFO) << message; break; } } @@ -73,50 +90,42 @@ void AWSLogSystem::Flush() { return; } namespace { -// Taken from tensorflow/core/platform/default/logging.cc -int ParseInteger(const char* str, size_t size) { - string integer_str(str, size); - std::istringstream ss(integer_str); - int level = 0; - ss >> level; - return level; -} - -// Taken from tensorflow/core/platform/default/logging.cc -int64 LogLevelStrToInt(const char* tf_env_var_val) { - if (tf_env_var_val == nullptr) { - return 0; +Aws::Utils::Logging::LogLevel TfLogLevelToAwsLogLevel(int aws_log_level) { + if (log_levels_tf_to_aws.find(level) != log_levels_tf_to_aws.end()) { + return log_levels_tf_to_aws.at(level); + } else { + // default to fatal + return Aws::Utils::Logging::LogLevel::Fatal; } - return ParseInteger(tf_env_var_val, strlen(tf_env_var_val)); } static const char* kAWSLoggingTag = "AWSLogging"; -Aws::Utils::Logging::LogLevel ParseLogLevelFromEnv() { - Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Info; +Aws::Utils::Logging::LogLevel ParseAwsLogLevelFromEnv() { + Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Fatal; - const int64_t level = getenv("AWS_LOG_LEVEL") - ? LogLevelStrToInt(getenv("AWS_LOG_LEVEL")) - : tensorflow::internal::MinLogLevelFromEnv(); - - switch (level) { - case INFO: - log_level = Aws::Utils::Logging::LogLevel::Info; - break; - case WARNING: - log_level = Aws::Utils::Logging::LogLevel::Warn; - break; - case ERROR: - log_level = Aws::Utils::Logging::LogLevel::Error; - break; - case FATAL: - log_level = Aws::Utils::Logging::LogLevel::Fatal; - break; - default: - log_level = Aws::Utils::Logging::LogLevel::Info; - break; + const char* aws_env_var_val = getenv("AWS_LOG_LEVEL"); + if (aws_env_var_val != nullptr) { + string maybe_integer_str(aws_env_var_val, strlen(aws_env_var_val)); + std::istringstream ss(maybe_integer_str); + int level; + ss >> level; + if (ss.fail()) { + // wasn't a number + // expecting a string here + string level_str = maybe_integer_str; + if (log_levels_string_to_aws.find(level_str) != log_levels_string_to_aws.end()) { + log_level = log_levels_string_to_aws.at(level_str); + } + } else { + // backwards compatibility + // valid number, but this number follows the standard tensorflow log levels + // need to convert this to aws sdk logging level number + log_level = TfLogLevelToAwsLogLevel(level); + } + } else { + log_level = TfLogLevelToAwsLogLevel(tensorflow::internal::MinLogLevelFromEnv()); } - return log_level; } } // namespace @@ -127,7 +136,7 @@ void AWSLogSystem::InitializeAWSLogging() { std::lock_guard s3_logging_lock(s3_logging_mutex); if (!initialized) { Aws::Utils::Logging::InitializeAWSLogging( - Aws::MakeShared(kAWSLoggingTag, ParseLogLevelFromEnv())); + Aws::MakeShared(kAWSLoggingTag, ParseAwsLogLevelFromEnv())); initialized = true; return; } From d7f0b8e8d4203cb9b5d5dbb5ea56e801fcbbc5f3 Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Wed, 11 Dec 2019 18:54:13 -0800 Subject: [PATCH 020/253] Make default level as fatal --- tensorflow/core/platform/s3/aws_logging.cc | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/platform/s3/aws_logging.cc b/tensorflow/core/platform/s3/aws_logging.cc index b73d0e4d5f9..2e6ee8de7f1 100644 --- a/tensorflow/core/platform/s3/aws_logging.cc +++ b/tensorflow/core/platform/s3/aws_logging.cc @@ -26,7 +26,7 @@ limitations under the License. namespace tensorflow { -static const map log_levels_string_to_aws = { +static const std::map log_levels_string_to_aws = { {"off", Aws::Utils::Logging::LogLevel::Off}, {"fatal", Aws::Utils::Logging::LogLevel::Fatal}, {"error", Aws::Utils::Logging::LogLevel::Error}, @@ -34,14 +34,14 @@ static const map log_levels_string_t {"info", Aws::Utils::Logging::LogLevel::Info}, {"debug", Aws::Utils::Logging::LogLevel::Debug}, {"trace", Aws::Utils::Logging::LogLevel::Trace} -} +}; -static const map log_levels_tf_to_aws = { +static const std::map log_levels_tf_to_aws = { {INFO, Aws::Utils::Logging::LogLevel::Info}, {WARNING, Aws::Utils::Logging::LogLevel::Warn}, {ERROR, Aws::Utils::Logging::LogLevel::Error}, {FATAL, Aws::Utils::Logging::LogLevel::Fatal} -} +}; AWSLogSystem::AWSLogSystem(Aws::Utils::Logging::LogLevel log_level) : log_level_(log_level) {} @@ -81,6 +81,7 @@ void AWSLogSystem::LogMessage(Aws::Utils::Logging::LogLevel log_level, LOG(FATAL) << message; break; default: + // this will match for DEBUG, TRACE LOG(INFO) << message; break; } @@ -90,7 +91,8 @@ void AWSLogSystem::Flush() { return; } namespace { -Aws::Utils::Logging::LogLevel TfLogLevelToAwsLogLevel(int aws_log_level) { +Aws::Utils::Logging::LogLevel TfLogLevelToAwsLogLevel(int level) { + // Converts TF Log Levels INFO, WARNING, ERROR and FATAL to the AWS enum values for the levels if (log_levels_tf_to_aws.find(level) != log_levels_tf_to_aws.end()) { return log_levels_tf_to_aws.at(level); } else { @@ -102,6 +104,11 @@ Aws::Utils::Logging::LogLevel TfLogLevelToAwsLogLevel(int aws_log_level) { static const char* kAWSLoggingTag = "AWSLogging"; Aws::Utils::Logging::LogLevel ParseAwsLogLevelFromEnv() { + // defaults to FATAL log level for the AWS SDK + // this is because many normal tensorflow operations are logged as errors in the AWS SDK + // such as checking if a file exists can log an error in AWS SDK if the file does not actually exist + // another such case is when reading a file till the end, TensorFlow expects to see an InvalidRange exception at the end, + // but this would be an error in the AWS SDK. This confuses users, hence the default setting. Aws::Utils::Logging::LogLevel log_level = Aws::Utils::Logging::LogLevel::Fatal; const char* aws_env_var_val = getenv("AWS_LOG_LEVEL"); @@ -112,19 +119,17 @@ Aws::Utils::Logging::LogLevel ParseAwsLogLevelFromEnv() { ss >> level; if (ss.fail()) { // wasn't a number - // expecting a string here + // expecting a string string level_str = maybe_integer_str; if (log_levels_string_to_aws.find(level_str) != log_levels_string_to_aws.end()) { log_level = log_levels_string_to_aws.at(level_str); } } else { // backwards compatibility - // valid number, but this number follows the standard tensorflow log levels - // need to convert this to aws sdk logging level number + // valid number, but this number follows the standard TensorFlow log levels + // need to convert this to AWS SDK logging level number log_level = TfLogLevelToAwsLogLevel(level); } - } else { - log_level = TfLogLevelToAwsLogLevel(tensorflow::internal::MinLogLevelFromEnv()); } return log_level; } From c70b209fc5f3223387aff449526579c2211d04ea Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Wed, 11 Dec 2019 19:09:50 -0800 Subject: [PATCH 021/253] Add VLOG statements for each main function call --- tensorflow/core/platform/s3/s3_file_system.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 2c4c1f695b6..db29881476d 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -185,6 +185,7 @@ class S3RandomAccessFile : public RandomAccessFile { Status Read(uint64 offset, size_t n, StringPiece* result, char* scratch) const override { + VLOG(1) << "ReadFilefromS3 s3://" << bucket_ << "/" << object_ << " from " << offset << " for n:" << n; Aws::S3::Model::GetObjectRequest getObjectRequest; getObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str()); string bytes = strings::StrCat("bytes=", offset, "-", offset + n - 1); @@ -260,6 +261,7 @@ class S3WritableFile : public WritableFile { if (!sync_needed_) { return Status::OK(); } + VLOG(1) << "WriteFileToS3: s3://" << bucket_ << "/" << object_; Aws::S3::Model::PutObjectRequest putObjectRequest; putObjectRequest.WithBucket(bucket_.c_str()).WithKey(object_.c_str()); long offset = outfile_->tellp(); @@ -405,6 +407,7 @@ Status S3FileSystem::FileExists(const string& fname) { Status S3FileSystem::GetChildren(const string& dir, std::vector* result) { + VLOG(1) << "GetChildren for path: " << dir; string bucket, prefix; TF_RETURN_IF_ERROR(ParseS3Path(dir, true, &bucket, &prefix)); @@ -452,6 +455,7 @@ Status S3FileSystem::GetChildren(const string& dir, } Status S3FileSystem::Stat(const string& fname, FileStatistics* stats) { + VLOG(1) << "Stat on path: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, true, &bucket, &object)); @@ -513,6 +517,7 @@ Status S3FileSystem::GetMatchingPaths(const string& pattern, } Status S3FileSystem::DeleteFile(const string& fname) { + VLOG(1) << "DeleteFile: " << fname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(fname, false, &bucket, &object)); @@ -529,6 +534,7 @@ Status S3FileSystem::DeleteFile(const string& fname) { } Status S3FileSystem::CreateDir(const string& dirname) { + VLOG(1) << "CreateDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, true, &bucket, &object)); @@ -552,6 +558,7 @@ Status S3FileSystem::CreateDir(const string& dirname) { } Status S3FileSystem::DeleteDir(const string& dirname) { + VLOG(1) << "DeleteDir: " << dirname; string bucket, object; TF_RETURN_IF_ERROR(ParseS3Path(dirname, false, &bucket, &object)); @@ -592,6 +599,7 @@ Status S3FileSystem::GetFileSize(const string& fname, uint64* file_size) { } Status S3FileSystem::RenameFile(const string& src, const string& target) { + VLOG(1) << "RenameFile from: " << src << " to: " << target; string src_bucket, src_object, target_bucket, target_object; TF_RETURN_IF_ERROR(ParseS3Path(src, false, &src_bucket, &src_object)); TF_RETURN_IF_ERROR( From 05bb06c13f412d4dc3f319f9038eac4adce73fff Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Thu, 30 Jan 2020 17:25:47 -0800 Subject: [PATCH 022/253] Remove the changes to set CA Path to a different PR --- tensorflow/core/platform/s3/s3_file_system.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index db29881476d..d873639166e 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -124,14 +124,6 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() { cfg.requestTimeoutMs = timeout; } } - const char* ca_file = getenv("S3_CA_FILE"); - if (ca_file) { - cfg.caFile = Aws::String(ca_file); - } - const char* ca_path = getenv("S3_CA_PATH"); - if (ca_path) { - cfg.caPath = Aws::String(ca_path); - } init = true; } From de9cecdf3df96e7df736b68cbc1f8980053a7835 Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Fri, 21 Feb 2020 01:32:20 +0000 Subject: [PATCH 023/253] Bring back accidentally removed CA options --- tensorflow/core/platform/s3/s3_file_system.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index d873639166e..6ae87e280ba 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -125,6 +125,15 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() { } } + const char* ca_file = getenv("S3_CA_FILE"); + if (ca_file) { + cfg.caFile = Aws::String(ca_file); + } + const char* ca_path = getenv("S3_CA_PATH"); + if (ca_path) { + cfg.caPath = Aws::String(ca_path); + } + init = true; } From 7692ed9b3c10e7d2e9029d94a4afa418e13ded0b Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Mon, 17 Feb 2020 13:45:48 +0700 Subject: [PATCH 024/253] Add sorted builtin support for autograph --- .../python/autograph/operators/py_builtins.py | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index c6ae65ff412..c8cef6bbe8f 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -38,6 +38,9 @@ from tensorflow.python.ops import gen_parsing_ops from tensorflow.python.ops import gen_string_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sort_ops +from tensorflow.python.ops.parallel_for import control_flow_ops as parallel_ops +from tensorflow.python.ops import check_ops from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest @@ -461,8 +464,41 @@ def _py_all(iterable): return all(iterable) +def sorted_(iterable, key=UNSPECIFIED, reverse=UNSPECIFIED): + if tensor_util.is_tensor(iterable): + return _tf_sorted(iterable, key, reverse) + return _py_sorted(iterable, key, reverse) + + +def _tf_sorted(iterable, key, reverse): + """Overload of sorted_ for Tensor iterable.""" + if reverse is UNSPECIFIED: + direction = 'ASCENDING' + else: + direction = 'DESCENDING' + if key is not UNSPECIFIED: + mapped = parallel_ops.vectorized_map(key, iterable) + with ops.control_dependencies( + [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]): + order = sort_ops.argsort(mapped, direction=direction) + return array_ops.gather_v2(iterable, order) + with ops.control_dependencies( + [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]): + return sort_ops.sort(iterable, direction=direction) + + +def _py_sorted(iterable, key, reverse): + if key is not UNSPECIFIED and reverse is UNSPECIFIED: + return sorted(iterable, key=key) + if key is UNSPECIFIED and reverse is not UNSPECIFIED: + return sorted(iterable, reverse=reverse) + if key is not UNSPECIFIED and reverse is not UNSPECIFIED: + return sorted(iterable, key=key, reverse=reverse) + return sorted(iterable) + + SUPPORTED_BUILTINS = (abs, float, int, len, print, range, enumerate, zip, map, - filter, any, all) + filter, any, all, sorted) if six.PY2: SUPPORTED_BUILTINS += (xrange,) @@ -482,4 +518,5 @@ BUILTIN_FUNCTIONS_MAP = { 'filter': filter_, 'any': any_, 'all': all_, + 'sorted': sorted_, } From 6482d7d84155c611860a78d064ceff7968de40f6 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Mon, 17 Feb 2020 13:46:08 +0700 Subject: [PATCH 025/253] Add test case for sorted builtin support in autograph --- .../autograph/operators/py_builtins_test.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py index 10f6d2f9392..e0434a2c3c4 100644 --- a/tensorflow/python/autograph/operators/py_builtins_test.py +++ b/tensorflow/python/autograph/operators/py_builtins_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.ops import math_ops from tensorflow.python.platform import test @@ -373,6 +374,40 @@ class PyBuiltinsTest(test.TestCase): with self.assertRaises(ValueError): py_builtins.all_(dataset_mixed) + def test_sorted(self): + self.assertListEqual(py_builtins.sorted_([2, 3, 1]), [1, 2, 3]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], key=lambda x: -x), [3, 2, 1]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], reverse=True), [3, 2, 1]) + self.assertListEqual( + py_builtins.sorted_([2, 3, 1], key=lambda x: -x, reverse=True), + [1, 2, 3]) + self.assertListEqual( + py_builtins.sorted_([[4, 3], [2, 1]], key=lambda x: sum(x)), + [[2, 1], [4, 3]]) + + def test_sorted_tensor(self): + iterable_1 = constant_op.constant([2, 3, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1))), [1, 2, 3]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, key=lambda x: -x))), [3, 2, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, reverse=True))), [3, 2, 1]) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_1, key=lambda x: -x, reverse=True))), + [1, 2, 3]) + + iterable_2 = constant_op.constant([[4, 3], [2, 1]]) + with self.assertRaises(ValueError): + py_builtins.sorted_(iterable_2) + with self.assertRaises(ValueError): + py_builtins.sorted_(iterable_2, key=lambda x: -x) + self.assertListEqual(list(self.evaluate( + py_builtins.sorted_(iterable_2, key=lambda x: math_ops.reduce_sum(x)))), + [[2, 1], [4, 3]]) + if __name__ == '__main__': test.main() From bf0b5b619d633fcff14cc11243297537e83d77d2 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Tue, 18 Feb 2020 13:50:07 +0700 Subject: [PATCH 026/253] add an extra check in case the rank is static --- tensorflow/python/autograph/operators/py_builtins.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index c8cef6bbe8f..b0db94cb1e2 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -478,12 +478,16 @@ def _tf_sorted(iterable, key, reverse): direction = 'DESCENDING' if key is not UNSPECIFIED: mapped = parallel_ops.vectorized_map(key, iterable) + if mapped.shape.rank is not None and mapped.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]): order = sort_ops.argsort(mapped, direction=direction) return array_ops.gather_v2(iterable, order) + if iterable.shape.rank is not None and iterable.shape.rank != 1: + raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]): + [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]): return sort_ops.sort(iterable, direction=direction) From 346cd079735de432391800382eed9fe9e9111340 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Wed, 19 Feb 2020 17:58:56 +0700 Subject: [PATCH 027/253] fix build failures --- tensorflow/python/autograph/operators/py_builtins.py | 6 ++++-- tensorflow/python/autograph/operators/py_builtins_test.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index b0db94cb1e2..5ce0a3fdc59 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -481,13 +481,15 @@ def _tf_sorted(iterable, key, reverse): if mapped.shape.rank is not None and mapped.shape.rank != 1: raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]): + [check_ops.assert_rank_v2( + mapped, 1, 'sort only supports only 1D tensors')]): order = sort_ops.argsort(mapped, direction=direction) return array_ops.gather_v2(iterable, order) if iterable.shape.rank is not None and iterable.shape.rank != 1: raise ValueError('sort only supports only 1D tensors') with ops.control_dependencies( - [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]): + [check_ops.assert_rank_v2( + iterable, 1, 'sort only supports only 1D tensors')]): return sort_ops.sort(iterable, direction=direction) diff --git a/tensorflow/python/autograph/operators/py_builtins_test.py b/tensorflow/python/autograph/operators/py_builtins_test.py index e0434a2c3c4..21a154867a7 100644 --- a/tensorflow/python/autograph/operators/py_builtins_test.py +++ b/tensorflow/python/autograph/operators/py_builtins_test.py @@ -397,7 +397,7 @@ class PyBuiltinsTest(test.TestCase): py_builtins.sorted_(iterable_1, reverse=True))), [3, 2, 1]) self.assertListEqual(list(self.evaluate( py_builtins.sorted_(iterable_1, key=lambda x: -x, reverse=True))), - [1, 2, 3]) + [1, 2, 3]) iterable_2 = constant_op.constant([[4, 3], [2, 1]]) with self.assertRaises(ValueError): @@ -406,7 +406,7 @@ class PyBuiltinsTest(test.TestCase): py_builtins.sorted_(iterable_2, key=lambda x: -x) self.assertListEqual(list(self.evaluate( py_builtins.sorted_(iterable_2, key=lambda x: math_ops.reduce_sum(x)))), - [[2, 1], [4, 3]]) + [[2, 1], [4, 3]]) if __name__ == '__main__': From 459b5389f5423cc423e8647f49bc651bd3c66db5 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Thu, 20 Feb 2020 11:07:09 +0700 Subject: [PATCH 028/253] resolve missing dependency build failures --- tensorflow/python/autograph/operators/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index 0969606670a..fe15cc9fd7f 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -48,6 +48,7 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/autograph/utils", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/ops/parallel_for:control_flow_ops", "//third_party/py/numpy", ], ) From a1aa469f0f3d34465da65e8df9f7d6cc2942f6d3 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Sat, 22 Feb 2020 16:16:04 +1100 Subject: [PATCH 029/253] Safely handle all list ops in auto_mixed_precision - Rewrites the handling of Tensor List ops in the auto_mixed_precision grappler pass, fixing several issues. - Now supports all types of Tensor List ops as well as special cases such as AddN nodes that operate on two Tensor Lists. - Now handles unsafe situations where Tensor List handles are passed through untraversable edges such as between sub-graphs. - Now properly detects writer -> reader node dependencies. - Note that fp16 conversion remains sub-optimal when sub-graphs are involved. Future work should consider optimizing over all graphs simultaneously or minimizing the cases in which sub-graphs are not inlined. --- .../optimizers/auto_mixed_precision.cc | 531 +++++++++--------- .../optimizers/auto_mixed_precision_lists.h | 2 + 2 files changed, 281 insertions(+), 252 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc index 3aa6beab645..c0e50df42ea 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision.cc @@ -398,12 +398,11 @@ class GraphTypeTopologyView { // computing graph topology. Example: Tensorflow runtime allows concurrent // execution of dequeue/enqueue ops from the same queue resource, but we might // want to enforce ordering between them for the purpose of graph analysis. - Status InitializeFromGraph(const GraphDef& graph, - const NodeTypeAttrMap& node_type_map, - absl::Span ephemeral_edges); Status InitializeFromGraph(const GraphDef& graph, const NodeTypeAttrMap& node_type_map); + Status AddEphemeralEdges(absl::Span ephemeral_edges); + bool is_initialized() const { return graph_ != nullptr; } int num_nodes() const { return num_nodes_; } const GraphDef* graph() const { return graph_; } @@ -474,8 +473,7 @@ inline void SortAndRemoveDuplicates(T* v) { } Status GraphTypeTopologyView::InitializeFromGraph( - const GraphDef& graph, const NodeTypeAttrMap& node_type_map, - absl::Span ephemeral_edges) { + const GraphDef& graph, const NodeTypeAttrMap& node_type_map) { if (graph_ != nullptr) { return errors::InvalidArgument( "GraphTypeTopologyView is already initialized."); @@ -503,46 +501,7 @@ Status GraphTypeTopologyView::InitializeFromGraph( fanins_.resize(num_nodes_); fanouts_.resize(num_nodes_); - // 1. Add ephemeral edges to the adjacency lists. - for (const NodeTypeIdEdge& edge : ephemeral_edges) { - const auto src = node_name_to_index_.find(edge.src.node->name()); - const bool valid_src = src != node_name_to_index_.end(); - - if (!valid_src) { - const string error_message = - absl::StrCat("Non-existent src node: ", edge.src.node->name()); - if (skip_invalid_edges_) { - VLOG(0) << "Skip error: " << error_message; - } else { - return errors::InvalidArgument(error_message); - } - } - - const auto dst = node_name_to_index_.find(edge.dst.node->name()); - const bool valid_dst = dst != node_name_to_index_.end(); - - if (!valid_dst) { - const string error_message = - absl::StrCat("Non-existent dst node: ", edge.dst.node->name()); - if (skip_invalid_edges_) { - VLOG(0) << "Skip error: " << error_message; - } else { - return errors::InvalidArgument(error_message); - } - } - - if (valid_dst && valid_src) { - // TODO(benbarsdell): Check for failure. - int src_node_type_idx = node_type_name_to_index_.at( - NodeTypeKey(edge.src.node->name(), edge.src.type_attr)); - int dst_node_type_idx = node_type_name_to_index_.at( - NodeTypeKey(edge.dst.node->name(), edge.dst.type_attr)); - fanins_[dst_node_type_idx].push_back(src_node_type_idx); - fanouts_[src_node_type_idx].push_back(dst_node_type_idx); - } - } - - // 2. Add graph edges to the adjacency lists. + // Add graph edges to the adjacency lists. for (int node_type_idx = 0; node_type_idx < num_nodes_; ++node_type_idx) { const NodeTypeId& node_type = node_type_attrs_.at(node_type_idx); auto input_ports = @@ -597,10 +556,54 @@ Status GraphTypeTopologyView::InitializeFromGraph( return Status::OK(); } -Status GraphTypeTopologyView::InitializeFromGraph( - const GraphDef& graph, const NodeTypeAttrMap& node_type_map) { - return InitializeFromGraph(graph, node_type_map, - absl::Span()); +Status GraphTypeTopologyView::AddEphemeralEdges( + absl::Span ephemeral_edges) { + // Add ephemeral edges to the adjacency lists. + for (const NodeTypeIdEdge& edge : ephemeral_edges) { + const auto src = node_name_to_index_.find(edge.src.node->name()); + const bool valid_src = src != node_name_to_index_.end(); + + if (!valid_src) { + const string error_message = + absl::StrCat("Non-existent src node: ", edge.src.node->name()); + if (skip_invalid_edges_) { + VLOG(0) << "Skip error: " << error_message; + } else { + return errors::InvalidArgument(error_message); + } + } + + const auto dst = node_name_to_index_.find(edge.dst.node->name()); + const bool valid_dst = dst != node_name_to_index_.end(); + + if (!valid_dst) { + const string error_message = + absl::StrCat("Non-existent dst node: ", edge.dst.node->name()); + if (skip_invalid_edges_) { + VLOG(0) << "Skip error: " << error_message; + } else { + return errors::InvalidArgument(error_message); + } + } + + if (valid_dst && valid_src) { + // TODO(benbarsdell): Check for failure. + int src_node_type_idx = node_type_name_to_index_.at( + NodeTypeKey(edge.src.node->name(), edge.src.type_attr)); + int dst_node_type_idx = node_type_name_to_index_.at( + NodeTypeKey(edge.dst.node->name(), edge.dst.type_attr)); + fanins_[dst_node_type_idx].push_back(src_node_type_idx); + fanouts_[src_node_type_idx].push_back(dst_node_type_idx); + } + } + + // Dedup inputs and outputs for all the graph nodes. + for (int node_type_idx = 0; node_type_idx < num_nodes_; ++node_type_idx) { + SortAndRemoveDuplicates(&fanins_[node_type_idx]); + SortAndRemoveDuplicates(&fanouts_[node_type_idx]); + } + + return Status::OK(); } bool GraphTypeTopologyView::HasNode(absl::string_view node_name, @@ -938,6 +941,7 @@ class AutoMixedPrecisionImpl { : virtual_placer_(cluster->GetDevices()), nodes_to_preserve_(nodes_to_preserve), graph_(graph), + function_library_(OpRegistry::Global(), graph->library()), id_(id), graph_view_(graph), cuda_version_(GetCudaVersion(*cluster)), @@ -947,11 +951,6 @@ class AutoMixedPrecisionImpl { private: typedef absl::flat_hash_set NodeTypeIdSet; - // Maps data structure object ops (e.g., StackV2) to the sets of nodes that - // write (e.g., StackPushV2) and read (e.g., StackPopV2) from them. - typedef absl::flat_hash_map> - DataStructureOpsMap; Status PrintDebugLogs(bool preop, size_t timestamp); void LogSkippedNode(const NodeDef& node) const; @@ -963,19 +962,19 @@ class AutoMixedPrecisionImpl { bool NodeImplicitlyReadsNonResourceVariable(const NodeDef& node) const; void ConvertBatchNormOpsToV2(); bool SupportsFloat16(const NodeTypeId& node_type) const; - const NodeDef* GetTailOfChain( - const NodeDef& node, const absl::flat_hash_set& match_ops) const; - Status AddDataStructureOpsToMap( - const absl::flat_hash_set& data_structure_ops, - TypeAttrId data_structure_type_attr, - const absl::flat_hash_map& write_ops, - const absl::flat_hash_map& read_ops, - DataStructureOpsMap* object_clients_map) const; + const NodeTypeId* GetTensorListFloat32NodeTypeId(const NodeDef& node) const; + bool IsSourceOrSinkOp(const string& op) const; + void FindFloat32TensorListOpClustersAndBlacklistUnsafe( + std::vector>* clusters, + absl::flat_hash_set* black_set) const; + void FindTensorListImplicitFloat32Edges( + const absl::flat_hash_set& tensor_list_nodes, + std::vector* implicit_data_edges) const; void AddWhitelistOps(absl::flat_hash_set* white_set) const; void PropagateBlackFwdThroughClearAndGray( absl::flat_hash_set* black_set) const; - void ForceColorMatchBetweenDataStructureOps( - const DataStructureOpsMap& object_clients_map, + void ForceColorMatchBetweenTensorListOps( + const absl::flat_hash_set& tensor_list_nodes, absl::flat_hash_set* white_set, absl::flat_hash_set* black_set) const; void AddClearAndGrayToWhiteIfBetweenWhite( @@ -992,6 +991,7 @@ class AutoMixedPrecisionImpl { VirtualPlacer virtual_placer_; std::unordered_set nodes_to_preserve_; GraphDef* graph_; + FunctionLibraryDefinition function_library_; string id_; MutableGraphView graph_view_; int cuda_version_; @@ -1140,6 +1140,26 @@ bool IsFloat32(const NodeTypeId& node_type) { DataType::DT_FLOAT; } +bool IsTensorListOp(const string& op) { + return op.find("TensorList") != string::npos; +} + +bool IsTensorListReaderOp(const string& op) { + const gtl::FlatSet tensor_list_reader_ops = { + "TensorListConcat", "TensorListConcatV2", "TensorListGather", + "TensorListGetItem", "TensorListPopBack", "TensorListStack"}; + return tensor_list_reader_ops.count(op); +} + +bool IsTensorListWriterOp(const string& op) { + const gtl::FlatSet tensor_list_writer_ops = { + "TensorListFromTensor", "TensorListPushBack", + "TensorListPushBackBatch", "TensorListScatter", + "TensorListScatterV2", "TensorListScatterIntoExistingList", + "TensorListSetItem", "TensorListSplit"}; + return tensor_list_writer_ops.count(op); +} + bool AutoMixedPrecisionImpl::SupportsFloat16( const NodeTypeId& node_type) const { const OpDef* op_def; @@ -1223,97 +1243,24 @@ Status AutoMixedPrecisionImpl::Optimize() { VLOG(2) << "Building node type map for graph"; TF_RETURN_IF_ERROR(node_type_map_.Init(*graph_)); - // Note: If an op is added to this list that has a data type attribute, it - // should also be added to the AddDataStructureOpsToMap call below (and to the - // clearlist if it involves data flow). - // TODO(benbarsdell): Add support for TensorListPushBackBatch and - // TensorListConcatLists. They require special handling because they connect - // multiple list objects together. Currently if they appear in the graph then - // we have no choice but to disallow changing any tensor list ops, as - // otherwise we risk breaking the graph if some are changed and some are not - // (within a connected cluster of tensor list nodes). - const gtl::FlatSet supported_list_ops = { - "EmptyTensorList", - "TensorListSplit", - "TensorListFromTensor", - "TensorListReserve", - "TensorListScatter", - "TensorListScatterV2", - "TensorListPushBack", - "TensorListSetItem", - "TensorListScatterIntoExistingList", - "TensorListPopBack", - "TensorListStack", - "TensorListConcat", - "TensorListConcatV2", - "TensorListGetItem", - "TensorListGather", - "TensorListLength", - "TensorListElementShape", - "TensorListResize"}; - - bool can_change_tensor_list_ops = true; - for (const NodeDef& node : graph_->node()) { - if (absl::StartsWith(node.op(), "TensorList") && - !supported_list_ops.count(node.op())) { - LOG(WARNING) << "Unsupported " << node.op() << " node found in graph (" - << node.name() - << "), tensor list ops will not be converted."; - can_change_tensor_list_ops = false; - break; - } - } - - DataStructureOpsMap object_clients_map; - if (can_change_tensor_list_ops) { - VLOG(2) << "Identifying TensorList* nodes"; - TF_RETURN_IF_ERROR(AddDataStructureOpsToMap( - {"EmptyTensorList", "TensorListSplit", "TensorListFromTensor", - "TensorListReserve", "TensorListScatter", "TensorListScatterV2"}, - TypeAttrId("element_dtype"), - {{"TensorListPushBack", TypeAttrId("element_dtype")}, - {"TensorListSetItem", TypeAttrId("element_dtype")}, - {"TensorListScatterIntoExistingList", TypeAttrId("element_dtype")}}, - {{"TensorListPopBack", TypeAttrId("element_dtype")}, - {"TensorListStack", TypeAttrId("element_dtype")}, - {"TensorListConcat", TypeAttrId("element_dtype")}, - {"TensorListConcatV2", TypeAttrId("element_dtype")}, - {"TensorListGetItem", TypeAttrId("element_dtype")}, - {"TensorListGather", TypeAttrId("element_dtype")}}, - &object_clients_map)); - } else { - for (const string& list_op : supported_list_ops) { - fp16_whitelist_.erase(list_op); - fp16_graylist_.erase(list_op); - fp16_clearlist_.erase(list_op); - } - } - - // Create ephemeral edges between writers and readers of data structure ops. - std::vector ephemeral_edges; - for (const auto& object_clients : object_clients_map) { - const auto& client_nodes = object_clients.second; - for (const NodeTypeId& write_node_type : client_nodes.first) { - for (const NodeTypeId& read_node_type : client_nodes.second) { - ephemeral_edges.emplace_back(write_node_type, read_node_type); - } - } - const NodeTypeId& object_node_type = object_clients.first; - // These object types also act as writers because they initialize the object - // from an input tensor. - if (object_node_type.node->op() == "TensorListSplit" || - object_node_type.node->op() == "TensorListFromTensor" || - object_node_type.node->op() == "TensorListScatter" || - object_node_type.node->op() == "TensorListScatterV2") { - for (const NodeTypeId& read_node_type : client_nodes.second) { - ephemeral_edges.emplace_back(object_node_type, read_node_type); - } - } - } - VLOG(2) << "Constructing graph type attribute topology view"; - TF_RETURN_IF_ERROR(graph_type_view_.InitializeFromGraph( - *graph_, node_type_map_, ephemeral_edges)); + TF_RETURN_IF_ERROR( + graph_type_view_.InitializeFromGraph(*graph_, node_type_map_)); + + absl::flat_hash_set black_set; + + std::vector> tensor_list_clusters; + FindFloat32TensorListOpClustersAndBlacklistUnsafe(&tensor_list_clusters, + &black_set); + std::vector ephemeral_edges; + for (const auto& cluster : tensor_list_clusters) { + VLOG(1) << "Found safe Tensor List cluster of size " << cluster.size(); + for (const NodeDef* node : cluster) { + VLOG(2) << "Cluster member: " << node->op() << " node " << node->name(); + } + FindTensorListImplicitFloat32Edges(cluster, &ephemeral_edges); + } + TF_RETURN_IF_ERROR(graph_type_view_.AddEphemeralEdges(ephemeral_edges)); // The goal here is to change performance-critical ops to fp16, and to do so // with the minimal number of casts, subject to the constraint that the @@ -1352,15 +1299,15 @@ Status AutoMixedPrecisionImpl::Optimize() { return Status::OK(); } - absl::flat_hash_set black_set; VLOG(2) << "Beginning pass 2 to propagate black forwards from blacklist ops " "through clear/graylist ops"; PropagateBlackFwdThroughClearAndGray(&black_set); VLOG(2) << "Finished pass 2"; VLOG(2) << "Forcing color match between data structure ops"; - ForceColorMatchBetweenDataStructureOps(object_clients_map, &white_set, - &black_set); + for (const auto& cluster : tensor_list_clusters) { + ForceColorMatchBetweenTensorListOps(cluster, &white_set, &black_set); + } VLOG(2) << "Beginning pass 3 to set clear and gray nodes to white if they " "are between white ops"; @@ -1373,8 +1320,9 @@ Status AutoMixedPrecisionImpl::Optimize() { VLOG(2) << "Finished pass 4"; VLOG(2) << "Forcing color match between data structure ops"; - ForceColorMatchBetweenDataStructureOps(object_clients_map, &white_set, - &black_set); + for (const auto& cluster : tensor_list_clusters) { + ForceColorMatchBetweenTensorListOps(cluster, &white_set, &black_set); + } VLOG(2) << "Forcing color match on loop edges"; TF_RETURN_IF_ERROR(ForceColorMatchOnRecurrentEdges(&white_set)); @@ -1392,35 +1340,144 @@ Status AutoMixedPrecisionImpl::Optimize() { return Status::OK(); } -// Finds data structure object ops (e.g., StackV2) and the sets of nodes that -// write (e.g., StackPushV2) and read (e.g., StackPopV2) from them. -Status AutoMixedPrecisionImpl::AddDataStructureOpsToMap( - const absl::flat_hash_set& data_structure_ops, - TypeAttrId data_structure_type_attr, - const absl::flat_hash_map& write_ops, - const absl::flat_hash_map& read_ops, - DataStructureOpsMap* object_clients_map) const { - for (const NodeDef& node : graph_->node()) { - const auto write_iter = write_ops.find(node.op()); - const auto read_iter = read_ops.find(node.op()); - bool is_writer = write_iter != write_ops.end(); - bool is_reader = read_iter != read_ops.end(); - if (is_writer || is_reader) { - const NodeDef* object_node = GetTailOfChain(node, data_structure_ops); - if (!object_node) { - return errors::FailedPrecondition( - "No data structure op found upstream of ", node.op(), " node ", - node.name()); - } - NodeTypeId object_node_type(object_node, data_structure_type_attr); - TypeAttrId type_attr = is_writer ? write_iter->second : read_iter->second; - NodeTypeId node_type(&node, type_attr); - auto* value = &(*object_clients_map)[object_node_type]; - auto* node_set = is_writer ? &value->first : &value->second; - node_set->insert(node_type); +// If node is a Tensor List op with a float32 data type attribute then this +// returns a pointer to the NodeTypeId representing that type attribute. In +// all other cases this returns nullptr. +const NodeTypeId* AutoMixedPrecisionImpl::GetTensorListFloat32NodeTypeId( + const NodeDef& node) const { + if (!IsTensorListOp(node.op())) return nullptr; + for (const TypeAttrId& type_attr : node_type_map_.GetTypeAttrs(node)) { + const NodeTypeId* node_type = + graph_type_view_.GetNode(node.name(), type_attr); + if (node_type && node_type->type_attr.fixed_type == DT_INVALID && + node_type->type_attr.type_index == TypeAttrId::kSingleType && + IsFloat32(*node_type)) { + return node_type; } } - return Status::OK(); + return nullptr; +} + +bool AutoMixedPrecisionImpl::IsSourceOrSinkOp(const string& op) const { + const gtl::FlatSet source_and_sink_ops = { + "_Arg", + "_Retval", + "OptionalFromValue", + "OptionalGetValue", + "PartitionedCall", + "Placeholder", + "StatefulPartitionedCall", + }; + return source_and_sink_ops.count(op) || function_library_.Find(op); +} + +// Finds all clusters of float32 Tensor List nodes that are connected via their +// handle edges. Unsafe clusters (those with edges that cross untraversable +// boundaries via _Arg, _Ret, PartitionedCall etc. nodes) are added to black_set +// and not returned. +void AutoMixedPrecisionImpl::FindFloat32TensorListOpClustersAndBlacklistUnsafe( + std::vector>* tensor_list_clusters, + absl::flat_hash_set* black_set) const { + absl::flat_hash_set tensor_list_prop_set; + for (int root_idx = 0; root_idx < graph_type_view_.num_nodes(); ++root_idx) { + const NodeTypeId& root = *graph_type_view_.GetNode(root_idx); + // First add any non-processable Tensor List nodes to the black set to avoid + // them getting forced to white at the end of optimization. + if (!ShouldProcess(*root.node) && + GetTensorListFloat32NodeTypeId(*root.node) == &root) { + black_set->insert(root_idx); + continue; + } + if (!ShouldProcess(*root.node) || + root.type_attr.fixed_type != DataType::DT_VARIANT || + !GetTensorListFloat32NodeTypeId(*root.node) || + tensor_list_prop_set.count(root.node)) { + continue; + } + // Traverse Tensor List handle edges (DT_VARIANT) to find cluster of all + // connected Tensor List nodes. + absl::flat_hash_set cluster({root.node}); + bool cluster_is_safe = true; + DfsTypeTraversal(graph_type_view_, {&root}, + TypeTraversalDirection::kFollowInputsAndOutputs, + DfsTypePredicates::Enter([&](int idx) -> bool { + const NodeTypeId& item = *graph_type_view_.GetNode(idx); + return !tensor_list_prop_set.count(item.node); + }), + DfsTypeCallbacks::PreOrder([&](int idx) { + const NodeDef* node = + graph_type_view_.GetNode(idx)->node; + tensor_list_prop_set.insert(node); + if (GetTensorListFloat32NodeTypeId(*node)) { + cluster.insert(node); + } else if (IsSourceOrSinkOp(node->op())) { + // The cluster crosses an untraversable boundary, so + // mark as unsafe. + cluster_is_safe = false; + } + })); + if (cluster_is_safe) { + tensor_list_clusters->push_back(cluster); + } else { + // Paint the entire cluster black if it's unsafe. + VLOG(1) << "Painting Tensor List cluster of size " << cluster.size() + << " BLACK because it crosses graph boundaries"; + for (const NodeDef* node : cluster) { + const NodeTypeId* node_type = GetTensorListFloat32NodeTypeId(*node); + /*D*/ CHECK(node_type) << "No float32 type attribute found for " + << node->op() << " node " << node->name(); + const absl::optional maybe_node_type_idx = + graph_type_view_.GetNodeIndex(*node_type); + DCHECK(maybe_node_type_idx.has_value()) + << "Type attribute " << node_type->type_attr.DebugString() + << " of node " << node->name() << " not found in graph view"; + int node_type_idx = maybe_node_type_idx.value(); + VLOG(2) << "Painting type " << node_type->type_attr.DebugString() + << " of " << node->op() << " node " << node->name() + << " BLACK because its handle crosses graph boundaries, " + "making it unsafe to change"; + black_set->insert(node_type_idx); + } + } + } +} + +// Finds all writer -> reader pairs in the given set that are connected via +// their handles, and adds corresponding float32 edges to *implicit_fp32_edges. +void AutoMixedPrecisionImpl::FindTensorListImplicitFloat32Edges( + const absl::flat_hash_set& tensor_list_nodes, + std::vector* implicit_fp32_edges) const { + for (const NodeDef* root_node : tensor_list_nodes) { + if (!IsTensorListReaderOp(root_node->op())) continue; + NodeTypeId root(root_node, TypeAttrId(DataType::DT_VARIANT)); + const NodeTypeId* root_fp32 = GetTensorListFloat32NodeTypeId(*root.node); + /*D*/ CHECK(root_fp32) << "No float32 type attribute found for " + << root.node->op() << " node " << root.node->name(); + // Search backwards through handle edges (DT_VARIANT) for all writer ops, + // adding direct implicit edges between them and the reader. + DfsTypeTraversal( + graph_type_view_, {&root}, TypeTraversalDirection::kFollowInputs, + DfsTypePredicates::Enter([&](int idx) -> bool { + const NodeTypeId& item = *graph_type_view_.GetNode(idx); + return ShouldProcess(*item.node); + }), + DfsTypeCallbacks::PreOrder([&](int idx) { + const NodeTypeId& item = *graph_type_view_.GetNode(idx); + if (IsTensorListWriterOp(item.node->op())) { + const NodeTypeId* item_fp32 = + GetTensorListFloat32NodeTypeId(*item.node); + /*D*/ CHECK(item_fp32) + << "No float32 type attribute found for " << item.node->op() + << " node " << item.node->name(); + VLOG(2) << "Adding ephemeral float32 edge from " + << item_fp32->node->op() << " node " + << item_fp32->node->name() << " to " + << root_fp32->node->op() << " node " + << root_fp32->node->name(); + implicit_fp32_edges->emplace_back(*item_fp32, *root_fp32); + } + })); + } } void AutoMixedPrecisionImpl::AddWhitelistOps( @@ -1654,75 +1711,45 @@ Status AutoMixedPrecisionImpl::ForceColorMatchOnRecurrentEdges( return Status::OK(); } -// Returns the last node in the simple chain starting at node and traversing -// backwards through the input(0) edge from each node until one with a matching -// op is found, or nullptr if no matching node is found. -const NodeDef* AutoMixedPrecisionImpl::GetTailOfChain( - const NodeDef& node, const absl::flat_hash_set& match_ops) const { - const NodeDef* node_ptr = &node; - do { - GraphView::InputPort node_input(node_ptr, 0); - MutableGraphView::OutputPort prev_output = - graph_view_.GetRegularFanin(node_input); - node_ptr = prev_output.node; - } while (node_ptr && !match_ops.count(node_ptr->op())); - return node_ptr; -} - -// Ensures that data structure nodes (e.g., StackV2) and all of their associated -// client nodes (e.g., StackPushV2 and StackPopV2) are in the same color set. -void AutoMixedPrecisionImpl::ForceColorMatchBetweenDataStructureOps( - const DataStructureOpsMap& object_clients_map, +// Forces all of the given Tensor List nodes into the same color set. +void AutoMixedPrecisionImpl::ForceColorMatchBetweenTensorListOps( + const absl::flat_hash_set& tensor_list_nodes, absl::flat_hash_set* white_set, absl::flat_hash_set* black_set) const { - for (const auto& object_clients : object_clients_map) { - const NodeTypeId& object_node_type = object_clients.first; - const auto& client_nodes = object_clients.second; - NodeTypeIdSet all_client_nodes = client_nodes.first; - all_client_nodes.insert(client_nodes.second.begin(), - client_nodes.second.end()); - // The object node may be considered a client too (e.g., - // TensorListFromTensor). - all_client_nodes.insert(object_node_type); - bool any_black = false; - bool any_white = false; - for (const NodeTypeId& node_type : all_client_nodes) { - const absl::optional maybe_node_idx = - graph_type_view_.GetNodeIndex(node_type); - DCHECK(maybe_node_idx.has_value()) - << "Type attribute " << node_type.type_attr.DebugString() - << " of node " << node_type.node->name() - << " not found in graph view"; - int node_idx = maybe_node_idx.value(); - if (black_set->count(node_idx)) { - any_black = true; - break; - } else if (white_set->count(node_idx)) { - any_white = true; - } + bool any_black = false; + bool any_white = false; + std::vector node_type_idxs; + node_type_idxs.reserve(tensor_list_nodes.size()); + for (const NodeDef* node : tensor_list_nodes) { + const NodeTypeId& node_type = *GetTensorListFloat32NodeTypeId(*node); + const absl::optional maybe_node_type_idx = + graph_type_view_.GetNodeIndex(node_type); + DCHECK(maybe_node_type_idx.has_value()) + << "Type attribute " << node_type.type_attr.DebugString() << " of node " + << node->name() << " not found in graph view"; + node_type_idxs.push_back(maybe_node_type_idx.value()); + } + for (int node_type_idx : node_type_idxs) { + if (black_set->count(node_type_idx)) { + any_black = true; + break; + } else if (white_set->count(node_type_idx)) { + any_white = true; } - if (any_black || any_white) { - for (const NodeTypeId& node_type : all_client_nodes) { - VLOG(2) << "Painting type " << node_type.type_attr.DebugString() - << " of " << node_type.node->op() << " node " - << node_type.node->name() << " " - << (any_black ? "BLACK" : "WHITE") - << " because at least one of its siblings is " - << (any_black ? "BLACK" : "WHITE"); - const absl::optional maybe_node_idx = - graph_type_view_.GetNodeIndex(node_type); - DCHECK(maybe_node_idx.has_value()) - << "Type attribute " << node_type.type_attr.DebugString() - << " of node " << node_type.node->name() - << " not found in graph view"; - int node_idx = maybe_node_idx.value(); - if (any_black) { - white_set->erase(node_idx); - black_set->insert(node_idx); - } else { - white_set->insert(node_idx); - } - } + } + if (!any_black && !any_white) return; + for (int node_type_idx : node_type_idxs) { + const NodeTypeId& node_type = *graph_type_view_.GetNode(node_type_idx); + VLOG(2) << "Painting type " << node_type.type_attr.DebugString() << " of " + << node_type.node->op() << " node " << node_type.node->name() << " " + << (any_black ? "BLACK" : "WHITE") + << " because at least one of its siblings is " + << (any_black ? "BLACK" : "WHITE"); + if (any_black) { + white_set->erase(node_type_idx); + black_set->insert(node_type_idx); + } else { + white_set->insert(node_type_idx); } } } diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h index dd6b1c16449..33cd747da25 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h @@ -294,11 +294,13 @@ class AutoMixedPrecisionLists { "StridedSliceGrad", "Switch", "TensorListConcat", + "TensorListConcatLists", "TensorListConcatV2", "TensorListGather", "TensorListGetItem", "TensorListPopBack", "TensorListPushBack", + "TensorListPushBackBatch", "TensorListFromTensor", "TensorListScatter", "TensorListScatterV2", From f4eb898be10f67629d6d0ccc84b89bb7de423392 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Sat, 22 Feb 2020 16:16:19 +1100 Subject: [PATCH 030/253] Add new Tensor List tests for auto_mixed_precision - Updates TensorListPushBackBatchAndConcatLists to check that all nodes are now converted to fp16. - Adds TensorListThroughFunction test to check that Tensor Lists that pass through sub-graphs are safely handled. --- .../optimizers/auto_mixed_precision_test.cc | 92 ++++++++++++++++++- 1 file changed, 89 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc index 0f48ae97c82..2d1d44093c9 100644 --- a/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc +++ b/tensorflow/core/grappler/optimizers/auto_mixed_precision_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/cc/ops/list_ops.h" #include "tensorflow/cc/ops/math_ops.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/grappler/clusters/single_machine.h" @@ -905,11 +906,96 @@ TEST_F(AutoMixedPrecisionTest, TensorListPushBackBatchAndConcatLists) { VLOG(1) << output.DebugString(); GraphView output_view(&output); - // TODO(benbarsdell): Add checks for data type conversion here once support - // for TensorListPushBackBatch and TensorListConcatLists is added in the - // auto_mixed_precision pass. + EXPECT_EQ(output.node_size(), item.graph.node_size() + 2); + const char* type_key = "element_dtype"; EXPECT_EQ(output_view.GetNode("wht1")->attr().at("T").type(), DT_HALF); EXPECT_EQ(output_view.GetNode("wht2")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl1")->attr().at(type_key).type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl3")->attr().at(type_key).type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl3r1")->attr().at(type_key).type(), DT_HALF); + + auto tensors = EvaluateNodes(output, item.fetch); + EXPECT_EQ(tensors.size(), tensors_expected.size()); + EXPECT_EQ(tensors.size(), item.fetch.size()); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectClose(tensors_expected[i], tensors[i], -1, 5e-4); + } +} + +TEST_F(AutoMixedPrecisionTest, TensorListThroughFunction) { + // This test passes a tensor list handle through a function with its own + // Tensor List ops inside to test that the types are not changed to a + // conflicting state. + // A separate Tensor List cluster is added to test that it is still changed to + // DT_HALF. + FunctionDefLibrary function_lib; + const Tensor kShape = test::AsTensor({32, 32}); + FunctionDef func1 = FunctionDefHelper::Define( + "Func1", {"ihandle: variant", "x: float"}, + {"ohandle: variant", "y: float"}, {}, + { + {{"tl1w1_handle"}, + "TensorListPushBack", + {"ihandle", "x"}, + {{"element_dtype", DT_FLOAT}}}, + {{"shape"}, "Const", {}, {{"value", kShape}, {"dtype", DT_INT32}}}, + {{"tl1r1_handle", "tl1r1_data"}, + "TensorListPopBack", + {"tl1w1_handle", "shape"}, + {{"element_dtype", DT_FLOAT}}}, + {{"ohandle"}, "Identity", {"tl1r1_handle"}, {{"T", DT_VARIANT}}}, + {{"y"}, "Identity", {"tl1r1_data"}, {{"T", DT_FLOAT}}}, + }); + function_lib.add_function()->Swap(&func1); + + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + TF_CHECK_OK(s.graph()->AddFunctionLibrary(function_lib)); + tensorflow::Input shape = {32, 32}; + Output input = ops::Const(s.WithOpName("input"), 1.f / 32, {32, 32}); + Output wht1 = ops::MatMul(s.WithOpName("wht1"), input, input); + Output gry1 = ops::Tanh(s.WithOpName("gry1"), wht1); + auto tl1 = ops::EmptyTensorList(s.WithOpName("tl1"), {32, 32}, 8, DT_FLOAT); + auto tl1w1 = ops::TensorListPushBack(s.WithOpName("tl1w1"), tl1.handle, gry1); + auto _gry1 = tensorflow::ops::AsNodeOut(s, gry1); + auto _tl1w1_handle = tensorflow::ops::AsNodeOut(s, tl1w1.output_handle); + auto builder = + tensorflow::NodeBuilder("Func1", "Func1", s.graph()->op_registry()); + tensorflow::Node* func1_op; + TF_CHECK_OK( + builder.Input(_tl1w1_handle).Input(_gry1).Finalize(s.graph(), &func1_op)); + Output func1_handle(func1_op, 0); + Output tl1r1 = ops::TensorListPopBack(s.WithOpName("tl1r1"), func1_handle, + shape, DT_FLOAT) + .tensor; + auto tl2 = ops::EmptyTensorList(s.WithOpName("tl2"), {32, 32}, 8, DT_FLOAT); + auto tl2w1 = ops::TensorListPushBack(s.WithOpName("tl2w1"), tl2.handle, gry1); + Output tl2r1 = ops::TensorListPopBack(s.WithOpName("tl2r1"), + tl2w1.output_handle, shape, DT_FLOAT) + .tensor; + Output wht2 = ops::MatMul(s.WithOpName("wht2"), tl1r1, tl2r1); + Output fetch1 = ops::Identity(s.WithOpName("fetch1"), wht2); + + GrapplerItem item; + item.fetch = {"fetch1"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + auto tensors_expected = EvaluateNodes(item.graph, item.fetch); + + AutoMixedPrecision optimizer; + GraphDef output; + TF_ASSERT_OK(optimizer.Optimize(virtual_cluster_.get(), item, &output)); + + VLOG(1) << output.DebugString(); + + GraphView output_view(&output); + const char* type_key = "element_dtype"; + EXPECT_EQ(output_view.GetNode("wht1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("wht2")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("gry1")->attr().at("T").type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl2")->attr().at(type_key).type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl2w1")->attr().at(type_key).type(), DT_HALF); + EXPECT_EQ(output_view.GetNode("tl2r1")->attr().at(type_key).type(), DT_HALF); auto tensors = EvaluateNodes(output, item.fetch); EXPECT_EQ(tensors.size(), tensors_expected.size()); From e6c5080c5e83899808f2fffa359df976a878f331 Mon Sep 17 00:00:00 2001 From: dothinking <13885442+dothinking@users.noreply.github.com> Date: Sat, 22 Feb 2020 17:18:04 +0800 Subject: [PATCH 031/253] update API doc page: tf.data.Dataset - update markdown list for `tf.data.Dataset.list_files` example - change `NOTE:` to `Note:` to keep a consistent style in API doc --- tensorflow/python/data/ops/dataset_ops.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 3e104793ca3..90e01cb2334 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -687,7 +687,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.take(3).as_numpy_iterator()) [(1, array([1])), (2, array([1, 1])), (3, array([1, 1, 1]))] - NOTE: The current implementation of `Dataset.from_generator()` uses + Note: The current implementation of `Dataset.from_generator()` uses `tf.numpy_function` and inherits the same constraints. In particular, it requires the `Dataset`- and `Iterator`-related operations to be placed on a device in the same process as the Python program that called @@ -695,7 +695,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): serialized in a `GraphDef`, and you should not use this method if you need to serialize your model and restore it in a different environment. - NOTE: If `generator` depends on mutable global variables or other external + Note: If `generator` depends on mutable global variables or other external state, be aware that the runtime may invoke `generator` multiple times (in order to support repeating the `Dataset`) and at any time between the call to `Dataset.from_generator()` and the production of the @@ -1013,17 +1013,20 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): filename with `list_files` may result in poor performance with remote storage systems. - NOTE: The default behavior of this method is to return filenames in + Note: The default behavior of this method is to return filenames in a non-deterministic random shuffled order. Pass a `seed` or `shuffle=False` to get results in a deterministic order. Example: If we had the following files on our filesystem: + - /path/to/dir/a.txt - /path/to/dir/b.py - /path/to/dir/c.py + If we pass "/path/to/dir/*.py" as the directory, the dataset would produce: + - /path/to/dir/b.py - /path/to/dir/c.py @@ -1077,7 +1080,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.as_numpy_iterator()) [1, 2, 3, 1, 2, 3, 1, 2, 3] - NOTE: If this dataset is a function of global state (e.g. a random number + Note: If this dataset is a function of global state (e.g. a random number generator), then different repetitions may produce different elements. Args: @@ -1331,6 +1334,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): Raises: InvalidArgumentError: if `num_shards` or `index` are illegal values. + Note: error checking is done on a best-effort basis, and errors aren't guaranteed to be caught upon dataset creation. (e.g. providing in a placeholder tensor bypasses the early checking, and will instead result @@ -1688,7 +1692,7 @@ name=None)) 5, 5, 5, 5, 5, 5] - NOTE: The order of elements yielded by this transformation is + Note: The order of elements yielded by this transformation is deterministic, as long as `map_func` is a pure function and `deterministic=True`. If `map_func` contains any stateful operations, the order in which that state is accessed is undefined. @@ -2352,7 +2356,7 @@ class DatasetV1(DatasetV2): deterministic=None): """Maps `map_func` across the elements of this dataset. - NOTE: This is an escape hatch for existing uses of `map` that do not work + Note: This is an escape hatch for existing uses of `map` that do not work with V2 functions. New uses are strongly discouraged and existing uses should migrate to `map` as this method will be removed in V2. @@ -2415,7 +2419,7 @@ class DatasetV1(DatasetV2): def filter_with_legacy_function(self, predicate): """Filters this dataset according to `predicate`. - NOTE: This is an escape hatch for existing uses of `filter` that do not work + Note: This is an escape hatch for existing uses of `filter` that do not work with V2 functions. New uses are strongly discouraged and existing uses should migrate to `filter` as this method will be removed in V2. From a98ab0be06a43aa7199022077ab15af465d0d86b Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Sun, 23 Feb 2020 18:04:54 +0530 Subject: [PATCH 032/253] Added example of from_tensors and from_tensor_slices --- tensorflow/python/data/ops/dataset_ops.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 3e104793ca3..b16624792af 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -534,6 +534,13 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.as_numpy_iterator()) [(array([1, 2, 3], dtype=int32), b'A')] + >>> # `from_tensors` creates 3D tensor in below example + >>> # unlike `from_tensor_slices` which merges the input tensor. + >>> dataset = tf.data.Dataset.from_tensors([tf.random_uniform([2, 3]), + tf.random_uniform([2, 3])]) + >>> list(dataset.as_numpy_iterator())[0].shape + (2, 2, 3) + Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more `tf.constant` operations. For large datasets (> 1 GB), this can waste @@ -611,6 +618,13 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): [3, 2]], dtype=int32), array([[b'A'], [b'B']], dtype=object)) + >>> # `from_tensor_slices` merges the input tensor + >>> # unlike `from_tensors` which will create 3D tensor in below example. + >>> dataset = tf.data.Dataset.from_tensor_slices([tf.random.uniform([2, 3]), + tf.random.uniform([2, 3])]) + >>> list(dataset.as_numpy_iterator())[0].shape + (2, 3) + Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more `tf.constant` operations. For large datasets (> 1 GB), this can waste From 119b461a54430b660e68c85c0dead2584a9e7952 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Sun, 23 Feb 2020 19:33:48 +0530 Subject: [PATCH 033/253] Added doc in DenseNet for decode_prediction() and preprocess_input() --- .../python/keras/applications/densenet.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/python/keras/applications/densenet.py b/tensorflow/python/keras/applications/densenet.py index 9a7be9a3b7a..bc161c71f82 100644 --- a/tensorflow/python/keras/applications/densenet.py +++ b/tensorflow/python/keras/applications/densenet.py @@ -351,12 +351,34 @@ def DenseNet201(include_top=True, @keras_export('keras.applications.densenet.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='torch') @keras_export('keras.applications.densenet.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) From 7a30b5a8623ec88b12347619dcc332a724d0a167 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Sun, 23 Feb 2020 19:37:12 +0530 Subject: [PATCH 034/253] Added doc in DenseNet for decode_prediction() and preprocess_input() --- tensorflow/python/keras/applications/densenet.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/keras/applications/densenet.py b/tensorflow/python/keras/applications/densenet.py index bc161c71f82..a793c862da6 100644 --- a/tensorflow/python/keras/applications/densenet.py +++ b/tensorflow/python/keras/applications/densenet.py @@ -358,6 +358,9 @@ def preprocess_input(x, data_format=None): Returns Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='torch') From da9cf4bc24f84c0b06247bff463fdf30e337f3f4 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Sun, 23 Feb 2020 19:55:39 +0530 Subject: [PATCH 035/253] Added doc for all application models for decode_prediction() and preprocess_input() --- .../python/keras/applications/efficientnet.py | 14 ++++++++++ .../python/keras/applications/inception_v3.py | 25 +++++++++++++++++ .../python/keras/applications/nasnet.py | 25 +++++++++++++++++ .../python/keras/applications/resnet.py | 25 +++++++++++++++++ .../python/keras/applications/resnet_v2.py | 25 +++++++++++++++++ tensorflow/python/keras/applications/vgg16.py | 27 +++++++++++++++++-- tensorflow/python/keras/applications/vgg19.py | 27 +++++++++++++++++-- .../python/keras/applications/xception.py | 25 +++++++++++++++++ 8 files changed, 189 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/applications/efficientnet.py b/tensorflow/python/keras/applications/efficientnet.py index 11ba3a98b7e..dacfeb055be 100644 --- a/tensorflow/python/keras/applications/efficientnet.py +++ b/tensorflow/python/keras/applications/efficientnet.py @@ -660,4 +660,18 @@ def preprocess_input(x, data_format=None): # pylint: disable=unused-argument @keras_export('keras.applications.efficientnet.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/inception_v3.py b/tensorflow/python/keras/applications/inception_v3.py index f8a56e62234..d9dbedb1a80 100644 --- a/tensorflow/python/keras/applications/inception_v3.py +++ b/tensorflow/python/keras/applications/inception_v3.py @@ -403,9 +403,34 @@ def conv2d_bn(x, @keras_export('keras.applications.inception_v3.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') @keras_export('keras.applications.inception_v3.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/nasnet.py b/tensorflow/python/keras/applications/nasnet.py index a29d5f4c380..59303dab7af 100644 --- a/tensorflow/python/keras/applications/nasnet.py +++ b/tensorflow/python/keras/applications/nasnet.py @@ -778,9 +778,34 @@ def _reduction_a_cell(ip, p, filters, block_id=None): @keras_export('keras.applications.nasnet.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') @keras_export('keras.applications.nasnet.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/resnet.py b/tensorflow/python/keras/applications/resnet.py index 86d26695373..87eb6e87df8 100644 --- a/tensorflow/python/keras/applications/resnet.py +++ b/tensorflow/python/keras/applications/resnet.py @@ -505,6 +505,17 @@ def ResNet152(include_top=True, @keras_export('keras.applications.resnet50.preprocess_input', 'keras.applications.resnet.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='caffe') @@ -512,6 +523,20 @@ def preprocess_input(x, data_format=None): @keras_export('keras.applications.resnet50.decode_predictions', 'keras.applications.resnet.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/resnet_v2.py b/tensorflow/python/keras/applications/resnet_v2.py index 2e31017dfa9..543b534e9fa 100644 --- a/tensorflow/python/keras/applications/resnet_v2.py +++ b/tensorflow/python/keras/applications/resnet_v2.py @@ -124,12 +124,37 @@ def ResNet152V2( @keras_export('keras.applications.resnet_v2.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='tf') @keras_export('keras.applications.resnet_v2.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/vgg16.py b/tensorflow/python/keras/applications/vgg16.py index e268a592833..128ab6e6c34 100644 --- a/tensorflow/python/keras/applications/vgg16.py +++ b/tensorflow/python/keras/applications/vgg16.py @@ -215,12 +215,35 @@ def VGG16( @keras_export('keras.applications.vgg16.preprocess_input') def preprocess_input(x, data_format=None): - """Preprocesses the input (encoding a batch of images) to the VGG16 model.""" + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='caffe') @keras_export('keras.applications.vgg16.decode_predictions') def decode_predictions(preds, top=5): - """Decodes the prediction result from the VGG16 model.""" + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/vgg19.py b/tensorflow/python/keras/applications/vgg19.py index 8d25dc0e42f..aee19f27423 100644 --- a/tensorflow/python/keras/applications/vgg19.py +++ b/tensorflow/python/keras/applications/vgg19.py @@ -225,12 +225,35 @@ def VGG19( @keras_export('keras.applications.vgg19.preprocess_input') def preprocess_input(x, data_format=None): - """Preprocesses the input (encoding a batch of images) to the VGG19 model.""" + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input( x, data_format=data_format, mode='caffe') @keras_export('keras.applications.vgg19.decode_predictions') def decode_predictions(preds, top=5): - """Decodes the prediction result from the VGG19 model.""" + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) diff --git a/tensorflow/python/keras/applications/xception.py b/tensorflow/python/keras/applications/xception.py index 7f6602b90d1..fe665045212 100644 --- a/tensorflow/python/keras/applications/xception.py +++ b/tensorflow/python/keras/applications/xception.py @@ -309,9 +309,34 @@ def Xception( @keras_export('keras.applications.xception.preprocess_input') def preprocess_input(x, data_format=None): + """Preprocesses a numpy array encoding a batch of images. + + Arguments + x: A 4D numpy array consists of RGB values within [0, 255]. + + Returns + Preprocessed array. + + Raises + ValueError: In case of unknown `data_format` argument. + """ return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') @keras_export('keras.applications.xception.decode_predictions') def decode_predictions(preds, top=5): + """Decodes the prediction result from the model. + + Arguments + preds: Numpy tensor encoding a batch of predictions. + top: Integer, how many top-guesses to return. + + Returns + A list of lists of top class prediction tuples + `(class_name, class_description, score)`. + One list of tuples per sample in batch input. + + Raises + ValueError: In case of invalid shape of the `preds` array (must be 2D). + """ return imagenet_utils.decode_predictions(preds, top=top) From ed85a1efe3ac435a7490de0f670e3ed6078f1afe Mon Sep 17 00:00:00 2001 From: Xiaoquan Kong Date: Mon, 24 Feb 2020 14:37:22 +0800 Subject: [PATCH 036/253] fix(keras): unable serialize custom train config --- tensorflow/python/util/serialization.py | 6 ++- tensorflow/python/util/serialization_test.py | 45 +++++++++++++++++++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py index 1e5de4cb280..9abc1257de1 100644 --- a/tensorflow/python/util/serialization.py +++ b/tensorflow/python/util/serialization.py @@ -23,6 +23,7 @@ import wrapt from tensorflow.python.framework import tensor_shape from tensorflow.python.util.compat import collections_abc +from tensorflow.python.keras.utils import generic_utils def get_json_type(obj): @@ -40,7 +41,10 @@ def get_json_type(obj): # if obj is a serializable Keras class instance # e.g. optimizer, layer if hasattr(obj, 'get_config'): - return {'class_name': obj.__class__.__name__, 'config': obj.get_config()} + return { + 'class_name': generic_utils._get_name_or_custom_name(obj.__class__), + 'config': obj.get_config() + } # if obj is any numpy type if type(obj).__module__ == np.__name__: diff --git a/tensorflow/python/util/serialization_test.py b/tensorflow/python/util/serialization_test.py index 6df7533831b..b3581a82bed 100644 --- a/tensorflow/python/util/serialization_test.py +++ b/tensorflow/python/util/serialization_test.py @@ -23,10 +23,12 @@ import json from tensorflow.python.framework import constant_op from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util +from tensorflow.python.keras import losses from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core +from tensorflow.python.keras.utils import losses_utils from tensorflow.python.platform import test from tensorflow.python.util import serialization @@ -69,5 +71,46 @@ class SerializationTests(test.TestCase): self.assertEqual( 10, model_round_trip["config"]["layers"][1]["config"]["units"]) + @test_util.run_in_graph_and_eager_modes + def test_serialize_custom_model_compile(self): + @register_keras_serializable(package='dummy-package') + class DummySparseCategoricalCrossentropyLoss(losses.LossFunctionWrapper): + # This loss is identical equal to tf.keras.losses.SparseCategoricalCrossentropy + def __init__( + self, + from_logits=False, + reduction=losses_utils.ReductionV2.AUTO, + name="dummy_sparse_categorical_crossentropy_loss", + ): + super(DummySparseCategoricalCrossentropyLoss, self).__init__( + losses.sparse_categorical_crossentropy, + name=name, + reduction=reduction, + from_logits=from_logits, + ) + + x = input_layer.Input(shape=[3]) + y = core.Dense(10)(x) + model = training.Model(x, y) + model.compile( + loss=DummySparseCategoricalCrossentropyLoss(from_logits=True) + ) + model_round_trip = json.loads( + json.dumps(model.loss, default=serialization.get_json_type) + ) + + # check if class name with package scope + self.assertEqual( + "dummy-package>DummySparseCategoricalCrossentropyLoss", + model_round_trip["class_name"] + ) + + # check if configure is correctly + self.assertEqual( + True, + model_round_trip["config"]["from_logits"] + ) + + if __name__ == "__main__": - test.main() + test.main() From dc048acf2ca5f3f0dcc91439d9cc69e70d19bd95 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 24 Feb 2020 11:59:01 +0100 Subject: [PATCH 037/253] Remove duplicate inpclude line --- tensorflow/core/kernels/ops_testutil.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/ops_testutil.h b/tensorflow/core/kernels/ops_testutil.h index c7c1e42d2c0..ea79a4b416b 100644 --- a/tensorflow/core/kernels/ops_testutil.h +++ b/tensorflow/core/kernels/ops_testutil.h @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/process_function_library_runtime.h" -#include "tensorflow/core/platform/threadpool.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/function.h" From 6da8ce0d1ff366c47defd85c1a94dca6920f829e Mon Sep 17 00:00:00 2001 From: Xiaoquan Kong Date: Mon, 24 Feb 2020 20:54:07 +0800 Subject: [PATCH 038/253] fix: TF API changes --- tensorflow/python/util/serialization.py | 2 +- tensorflow/python/util/serialization_test.py | 71 ++++++++++---------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/util/serialization.py b/tensorflow/python/util/serialization.py index 9abc1257de1..c34383c5f2e 100644 --- a/tensorflow/python/util/serialization.py +++ b/tensorflow/python/util/serialization.py @@ -42,7 +42,7 @@ def get_json_type(obj): # e.g. optimizer, layer if hasattr(obj, 'get_config'): return { - 'class_name': generic_utils._get_name_or_custom_name(obj.__class__), + 'class_name': generic_utils.get_registered_name(obj.__class__), 'config': obj.get_config() } diff --git a/tensorflow/python/util/serialization_test.py b/tensorflow/python/util/serialization_test.py index b3581a82bed..501dcda30f1 100644 --- a/tensorflow/python/util/serialization_test.py +++ b/tensorflow/python/util/serialization_test.py @@ -28,7 +28,7 @@ from tensorflow.python.keras.engine import input_layer from tensorflow.python.keras.engine import sequential from tensorflow.python.keras.engine import training from tensorflow.python.keras.layers import core -from tensorflow.python.keras.utils import losses_utils +from tensorflow.python.keras.utils import losses_utils, generic_utils from tensorflow.python.platform import test from tensorflow.python.util import serialization @@ -73,43 +73,44 @@ class SerializationTests(test.TestCase): @test_util.run_in_graph_and_eager_modes def test_serialize_custom_model_compile(self): - @register_keras_serializable(package='dummy-package') - class DummySparseCategoricalCrossentropyLoss(losses.LossFunctionWrapper): - # This loss is identical equal to tf.keras.losses.SparseCategoricalCrossentropy - def __init__( - self, - from_logits=False, - reduction=losses_utils.ReductionV2.AUTO, - name="dummy_sparse_categorical_crossentropy_loss", - ): - super(DummySparseCategoricalCrossentropyLoss, self).__init__( - losses.sparse_categorical_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - ) + with generic_utils.custom_object_scope(): + @generic_utils.register_keras_serializable(package='dummy-package') + class DummySparseCategoricalCrossentropyLoss(losses.LossFunctionWrapper): + # This loss is identical equal to tf.keras.losses.SparseCategoricalCrossentropy + def __init__( + self, + from_logits=False, + reduction=losses_utils.ReductionV2.AUTO, + name="dummy_sparse_categorical_crossentropy_loss", + ): + super(DummySparseCategoricalCrossentropyLoss, self).__init__( + losses.sparse_categorical_crossentropy, + name=name, + reduction=reduction, + from_logits=from_logits, + ) - x = input_layer.Input(shape=[3]) - y = core.Dense(10)(x) - model = training.Model(x, y) - model.compile( - loss=DummySparseCategoricalCrossentropyLoss(from_logits=True) - ) - model_round_trip = json.loads( - json.dumps(model.loss, default=serialization.get_json_type) - ) + x = input_layer.Input(shape=[3]) + y = core.Dense(10)(x) + model = training.Model(x, y) + model.compile( + loss=DummySparseCategoricalCrossentropyLoss(from_logits=True) + ) + model_round_trip = json.loads( + json.dumps(model.loss, default=serialization.get_json_type) + ) - # check if class name with package scope - self.assertEqual( - "dummy-package>DummySparseCategoricalCrossentropyLoss", - model_round_trip["class_name"] - ) + # check if class name with package scope + self.assertEqual( + "dummy-package>DummySparseCategoricalCrossentropyLoss", + model_round_trip["class_name"] + ) - # check if configure is correctly - self.assertEqual( - True, - model_round_trip["config"]["from_logits"] - ) + # check if configure is correctly + self.assertEqual( + True, + model_round_trip["config"]["from_logits"] + ) if __name__ == "__main__": From e4ea3977e34cb8a65198b10aa8592ecb7c92911a Mon Sep 17 00:00:00 2001 From: Xiaoquan Kong Date: Mon, 24 Feb 2020 21:00:07 +0800 Subject: [PATCH 039/253] fix: code style --- tensorflow/python/util/serialization_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/util/serialization_test.py b/tensorflow/python/util/serialization_test.py index 501dcda30f1..97a3c4b3056 100644 --- a/tensorflow/python/util/serialization_test.py +++ b/tensorflow/python/util/serialization_test.py @@ -114,4 +114,4 @@ class SerializationTests(test.TestCase): if __name__ == "__main__": - test.main() + test.main() From cc694d73199d97cd76cc37f0b5d603b190c89a56 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Tue, 25 Feb 2020 00:10:34 +0530 Subject: [PATCH 040/253] Comments changed --- tensorflow/python/data/ops/dataset_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index b16624792af..581341a266f 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -534,8 +534,8 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.as_numpy_iterator()) [(array([1, 2, 3], dtype=int32), b'A')] - >>> # `from_tensors` creates 3D tensor in below example - >>> # unlike `from_tensor_slices` which merges the input tensor. + >>> # `from_tensors` adds one more dimension to the shape + >>> # use `from_tensor_slices` to merge the input tensor. >>> dataset = tf.data.Dataset.from_tensors([tf.random_uniform([2, 3]), tf.random_uniform([2, 3])]) >>> list(dataset.as_numpy_iterator())[0].shape @@ -618,8 +618,8 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): [3, 2]], dtype=int32), array([[b'A'], [b'B']], dtype=object)) - >>> # `from_tensor_slices` merges the input tensor - >>> # unlike `from_tensors` which will create 3D tensor in below example. + >>> # `from_tensor_slices` merges the input tensor, unlike `from_tensors` + >>> # which increases dimensionality. >>> dataset = tf.data.Dataset.from_tensor_slices([tf.random.uniform([2, 3]), tf.random.uniform([2, 3])]) >>> list(dataset.as_numpy_iterator())[0].shape From f613b5c8982686d3ec0421217219ea6b4d54e318 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Tue, 25 Feb 2020 00:56:18 +0530 Subject: [PATCH 041/253] Comments changed --- tensorflow/python/data/ops/dataset_ops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 581341a266f..f015db9c03c 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -536,8 +536,9 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> # `from_tensors` adds one more dimension to the shape >>> # use `from_tensor_slices` to merge the input tensor. - >>> dataset = tf.data.Dataset.from_tensors([tf.random_uniform([2, 3]), - tf.random_uniform([2, 3])]) + >>> dataset = tf.data.Dataset.from_tensors( + [tf.random_uniform([2, 3]), tf.random_uniform([2, 3])] + ) >>> list(dataset.as_numpy_iterator())[0].shape (2, 2, 3) @@ -620,8 +621,9 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> # `from_tensor_slices` merges the input tensor, unlike `from_tensors` >>> # which increases dimensionality. - >>> dataset = tf.data.Dataset.from_tensor_slices([tf.random.uniform([2, 3]), - tf.random.uniform([2, 3])]) + >>> dataset = tf.data.Dataset.from_tensor_slices( + [tf.random.uniform([2, 3]), tf.random.uniform([2, 3])] + ) >>> list(dataset.as_numpy_iterator())[0].shape (2, 3) From a2661fb64011682451ce181126ab48bb59523f16 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Tue, 25 Feb 2020 12:35:44 +0530 Subject: [PATCH 042/253] Changed example and comments --- tensorflow/python/data/ops/dataset_ops.py | 26 +++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index f015db9c03c..6c12a06859a 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -534,13 +534,12 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.as_numpy_iterator()) [(array([1, 2, 3], dtype=int32), b'A')] - >>> # `from_tensors` adds one more dimension to the shape - >>> # use `from_tensor_slices` to merge the input tensor. - >>> dataset = tf.data.Dataset.from_tensors( - [tf.random_uniform([2, 3]), tf.random_uniform([2, 3])] - ) - >>> list(dataset.as_numpy_iterator())[0].shape - (2, 2, 3) + >>> # `from_tensors` does not change the shape of input tensor. + >>> # use `from_tensor_slices` to slice the input tensor. + >>> example = tf.constant([1,2,3]) + >>> dataset = tf.data.Dataset.from_tensors(example).repeat(1000) + >>> np.array(list(dataset1.as_numpy_iterator())).shape + (1000, 3) Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more @@ -619,13 +618,12 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): [3, 2]], dtype=int32), array([[b'A'], [b'B']], dtype=object)) - >>> # `from_tensor_slices` merges the input tensor, unlike `from_tensors` - >>> # which increases dimensionality. - >>> dataset = tf.data.Dataset.from_tensor_slices( - [tf.random.uniform([2, 3]), tf.random.uniform([2, 3])] - ) - >>> list(dataset.as_numpy_iterator())[0].shape - (2, 3) + >>> # `from_tensor_slices` slices the input tensor, + >>> # unlike `from_tensors` which retains the shape of input tensor. + >>> example = tf.constant([1,2,3]) + >>> dataset = tf.data.Dataset.from_tensor_slices(example).repeat(1000) + >>> np.array(list(dataset1.as_numpy_iterator())).shape + (3000,) Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more From 42068668663b774954be5e850407902d73044845 Mon Sep 17 00:00:00 2001 From: Xiaoquan Kong Date: Tue, 25 Feb 2020 16:16:38 +0800 Subject: [PATCH 043/253] A tiny typo fix in docstring in Keras Model:fit --- tensorflow/python/keras/engine/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index ba1b25a9423..e5730f42301 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -537,7 +537,7 @@ class Model(network.Network, version_utils.ModelVersionSelector): of either `(inputs, targets)` or `(inputs, targets, sample_weights)`. - A generator or `keras.utils.Sequence` returning `(inputs, targets)` - or `(inputs, targets, sample weights)`. + or `(inputs, targets, sample_weights)`. A more detailed description of unpacking behavior for iterator types (Dataset, generator, Sequence) is given below. y: Target data. Like the input data `x`, From 8aa4453ecfcf359ebb41ea2f1cdc99b3d78f8ec6 Mon Sep 17 00:00:00 2001 From: Rahul Huilgol Date: Tue, 25 Feb 2020 19:06:06 +0000 Subject: [PATCH 044/253] Remove trailing whitespace --- tensorflow/core/platform/s3/s3_file_system.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 4f30a154111..1ee43af97ec 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -144,7 +144,7 @@ Aws::Client::ClientConfiguration& GetDefaultClientConfig() { if (ca_path) { cfg.caPath = Aws::String(ca_path); } - + init = true; } From 4cce76cdeb8440995d343ca8920b2b1f17ac4939 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 25 Feb 2020 22:40:36 +0100 Subject: [PATCH 045/253] Use rebuild_func() for conversion too, edit doc --- .../python/compiler/tensorrt/trt_convert.py | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index f172804b70b..8d2b09d3427 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -1006,6 +1006,7 @@ class TrtGraphConverterV2(object): # rewriter_config is already validated self._need_trt_profiles = None + for optimizer in self._rewriter_config.custom_optimizers: if optimizer.name == "TensorRTOptimizer": self._need_trt_profiles = not optimizer.parameter_map[ @@ -1041,6 +1042,17 @@ class TrtGraphConverterV2(object): if node.op == _TRT_ENGINE_OP_NAME: fn(node) + def _rebuild_func(self, func): + """ Rebuild function from graph_def. """ + rebuilt_func = wrap_function.function_from_graph_def( + self._converted_graph_def, + [tensor.name for tensor in func.inputs], + [tensor.name for tensor in func.outputs]) + rebuilt_func.graph.structured_outputs = nest.pack_sequence_as( + func.graph.structured_outputs, + rebuilt_func.graph.structured_outputs) + return rebuilt_func + # TODO(laigd): provide a utility function to optimize a ConcreteFunction and # use it here (b/124792963). def convert(self, calibration_input_fn=None): @@ -1105,14 +1117,7 @@ class TrtGraphConverterV2(object): _save_calibration_table) # Rebuild the function since calibration has changed the graph. - calibrated_func = wrap_function.function_from_graph_def( - self._converted_graph_def, - [tensor.name for tensor in self._converted_func.inputs], - [tensor.name for tensor in self._converted_func.outputs]) - calibrated_func.graph.structured_outputs = nest.pack_sequence_as( - self._converted_func.graph.structured_outputs, - calibrated_func.graph.structured_outputs) - self._converted_func = calibrated_func + self._converted_func = self._rebuild_func(self._converted_func) self._converted = True @@ -1143,17 +1148,6 @@ class TrtGraphConverterV2(object): raise RuntimeError("input_fn is None. Method build() needs input_fn " "to be specified in order to build TensorRT engines") - def _rebuild_func(): - # Rebuild function from graph_def. - reset_converted_func = wrap_function.function_from_graph_def( - self._converted_graph_def, - [tensor.name for tensor in self._converted_func.inputs], - [tensor.name for tensor in self._converted_func.outputs]) - reset_converted_func.graph.structured_outputs = nest.pack_sequence_as( - self._converted_func.graph.structured_outputs, - reset_converted_func.graph.structured_outputs) - self._converted_func = reset_converted_func - def _set_profile_generation_mode(value, node): node.attr["_profile_generation_mode"].b = value @@ -1164,13 +1158,10 @@ class TrtGraphConverterV2(object): # Profile generation is enabled using the _profile_generation_mode # attribute of the TRTEngineOps. We need to rebuild the function to # change this attribute. - _rebuild_func() + func = self._rebuild_func(self._converted_func) + else: + func = self._converted_func - # Use the first input in explicit batch mode to build TensorRT engines - # after generating all the profiles. The first input is used but any of - # the inputs can be used because the shape of this input does not - # determine the engine and instead the shapes collected in profiles - # determine the engine. first_input = None # Run inference: # Builds TRT engines if self._need_trt_profiles is False. @@ -1178,14 +1169,17 @@ class TrtGraphConverterV2(object): for inp in input_fn(): if not first_input: first_input = inp - self._converted_func(*map(ops.convert_to_tensor, inp)) + func(*map(ops.convert_to_tensor, inp)) + if self._need_trt_profiles: # Disable profile generation. self._for_each_trt_node(self._converted_graph_def, partial(_set_profile_generation_mode, False)) - _rebuild_func() - # Run inference to build TensorRT engines out of generated optimization - # profiles. + # Use the first input in explicit batch mode to build TensorRT engines + # after generating all the profiles. The first input is used but any of + # the inputs can be used because the shape of this input does not + # determine the engine and instead the shapes collected in profiles + # determine the engine. self._converted_func(*map(ops.convert_to_tensor, first_input)) self._build_called_once = True From 1195aaa9ec2d29b553f7a21c5ad4027db6a475c5 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 25 Feb 2020 23:11:52 +0100 Subject: [PATCH 046/253] Introduce IsExplicitBatchModeEnabled function in trt_convert --- .../test/tf_trt_integration_test_base.py | 18 +++------------- .../python/compiler/tensorrt/trt_convert.py | 21 +++++++++++-------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py index 0f892e3d3ff..b2bb57f5140 100644 --- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py @@ -474,19 +474,6 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): converter.save(trt_saved_model_dir) return trt_saved_model_dir - def _NeedToBuild(self, conversion_params): - """ Whether we need to call converter.build(). - Currently we need to build if we have explicit batch (dynamic shapes) - """ - config = conversion_params.rewriter_config_template - if config is None: - return False - for optimizer in config.custom_optimizers: - if optimizer.name == 'TensorRTOptimizer': - if "use_implicit_batch" in optimizer.parameter_map: - return not optimizer.parameter_map["use_implicit_batch"].b - return False - def _GetInferGraph(self, run_params, saved_model_dir): """Return trt converted graphdef.""" conversion_params = self.GetConversionParams(run_params) @@ -499,8 +486,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): session_config, conversion_params) converter.convert() - if self._NeedToBuild(conversion_params): - logging.info("Need to start build mode") + if trt_convert.IsExplicitBatchModeEnabled( + conversion_params.rewriter_config_template): + logging.info("Using build mode") def _BuildInputFn(): for shapes in self._GetParamsCached().input_dims: yield [np.zeros(x).astype(np.float32) for x in shapes] diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index 8d2b09d3427..ec724b7c8db 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -384,6 +384,17 @@ def _get_canonical_engine_name(name): return name.split("/")[-1] +def IsExplicitBatchModeEnabled(rewriter_config): + """ Checks whether explicit batch is enabled by the rewriter config """ + if rewriter_config is None: + return False + for optimizer in rewriter_config.custom_optimizers: + if optimizer.name == 'TensorRTOptimizer': + if "use_implicit_batch" in optimizer.parameter_map: + return not optimizer.parameter_map["use_implicit_batch"].b + return False + + class TrtGraphConverter(object): """A converter for TF-TRT transformation for TF 1.x GraphDef/SavedModels. @@ -1005,15 +1016,7 @@ class TrtGraphConverterV2(object): "with static TensorRT ops. Set is_dynamic_op to True.") # rewriter_config is already validated - self._need_trt_profiles = None - - for optimizer in self._rewriter_config.custom_optimizers: - if optimizer.name == "TensorRTOptimizer": - self._need_trt_profiles = not optimizer.parameter_map[ - "use_implicit_batch"].b \ - if "use_implicit_batch" in optimizer.parameter_map else False - assert self._need_trt_profiles != None - + self._need_trt_profiles = IsExplicitBatchModeEnabled(self._rewriter_config) self._converted = False self._build_called_once = False From a7908e924af3105c3007988e219855174b26774f Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Sun, 26 Jan 2020 01:36:39 -0500 Subject: [PATCH 047/253] [core] Added check for output buffer --- tensorflow/core/lib/jpeg/jpeg_mem.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index 03befabdefe..8a94b39606e 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -593,6 +593,11 @@ bool GetImageInfo(const void* srcdata, int datasize, int* width, int* height, namespace { bool CompressInternal(const uint8* srcdata, int width, int height, const CompressFlags& flags, tstring* output) { + if (output == nullptr) + LOG(ERROR) << "Output buffer is null: "; + return false; + } + output->clear(); const int components = (static_cast(flags.format) & 0xff); From 5456ca81095b87d4c4be8b3ae261b299c45e4b90 Mon Sep 17 00:00:00 2001 From: aaronhma Date: Tue, 25 Feb 2020 17:34:30 -0800 Subject: [PATCH 048/253] Update README to add new resources and fix grammar --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e95fea22c56..5b4dd28c446 100644 --- a/README.md +++ b/README.md @@ -130,18 +130,20 @@ Build Type | Status ## Resources * [TensorFlow.org](https://www.tensorflow.org) -* [TensorFlow tutorials](https://www.tensorflow.org/tutorials/) -* [TensorFlow official models](https://github.com/tensorflow/models/tree/master/official) -* [TensorFlow examples](https://github.com/tensorflow/examples) +* [TensorFlow Tutorials](https://www.tensorflow.org/tutorials/) +* [TensorFlow Official Models](https://github.com/tensorflow/models/tree/master/official) +* [TensorFlow Examples](https://github.com/tensorflow/examples) * [TensorFlow in Practice from Coursera](https://www.coursera.org/specializations/tensorflow-in-practice) +* [TensorFlow: Data and Deployment from Coursera](https://www.coursera.org/specializations/tensorflow-data-and-deployment) * [Intro to TensorFlow for Deep Learning from Udacity](https://www.udacity.com/course/intro-to-tensorflow-for-deep-learning--ud187) * [Introduction to TensorFlow Lite from Udacity](https://www.udacity.com/course/intro-to-tensorflow-lite--ud190) -* [TensorFlow blog](https://blog.tensorflow.org) +* [TensorFlow Blog](https://blog.tensorflow.org) +* [Learn ML with TensorFlow](https://www.tensorflow.org/resources/learn-ml) * [TensorFlow Twitter](https://twitter.com/tensorflow) * [TensorFlow YouTube](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ) -* [TensorFlow roadmap](https://www.tensorflow.org/community/roadmap) -* [TensorFlow white papers](https://www.tensorflow.org/about/bib) -* [TensorBoard visualization toolkit](https://github.com/tensorflow/tensorboard) +* [TensorFlow Roadmap](https://www.tensorflow.org/community/roadmap) +* [TensorFlow White Papers](https://www.tensorflow.org/about/bib) +* [TensorBoard Visualization Toolkit](https://github.com/tensorflow/tensorboard) Learn more about the [TensorFlow community](https://www.tensorflow.org/community) and how to From f8e2a7d20408793d85c8b33c46ab7bb230bb4592 Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Wed, 26 Feb 2020 13:57:09 +0530 Subject: [PATCH 049/253] Changed examples and comments to show difference by repeating elements --- tensorflow/python/data/ops/dataset_ops.py | 24 +++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 6c12a06859a..59cba9d2d61 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -527,6 +527,9 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): def from_tensors(tensors): """Creates a `Dataset` with a single element, comprising the given tensors. + `from_tensors` produces a dataset containing only a single element. To slice + the input tensor into multiple elements, use `from_tensor_slices` instead. + >>> dataset = tf.data.Dataset.from_tensors([1, 2, 3]) >>> list(dataset.as_numpy_iterator()) [array([1, 2, 3], dtype=int32)] @@ -534,12 +537,12 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): >>> list(dataset.as_numpy_iterator()) [(array([1, 2, 3], dtype=int32), b'A')] - >>> # `from_tensors` does not change the shape of input tensor. - >>> # use `from_tensor_slices` to slice the input tensor. + >>> # You can use `from_tensors` to produce a dataset which repeats + >>> # the same example many times. >>> example = tf.constant([1,2,3]) - >>> dataset = tf.data.Dataset.from_tensors(example).repeat(1000) - >>> np.array(list(dataset1.as_numpy_iterator())).shape - (1000, 3) + >>> dataset = tf.data.Dataset.from_tensors(example).repeat(2) + >>> list(dataset1.as_numpy_iterator()) + [array([1, 2, 3], dtype=int32), array([1, 2, 3], dtype=int32)] Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more @@ -618,12 +621,13 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): [3, 2]], dtype=int32), array([[b'A'], [b'B']], dtype=object)) - >>> # `from_tensor_slices` slices the input tensor, - >>> # unlike `from_tensors` which retains the shape of input tensor. + >>> # `from_tensor_slices` can also be used to repeat the examples + >>> # in the data. But it doesn't repeat the whole tensor like `from_tensors`, + >>> # instead it repeats the individual elements (slices) from the example. >>> example = tf.constant([1,2,3]) - >>> dataset = tf.data.Dataset.from_tensor_slices(example).repeat(1000) - >>> np.array(list(dataset1.as_numpy_iterator())).shape - (3000,) + >>> dataset = tf.data.Dataset.from_tensor_slices(example).repeat(2) + >>> list(dataset1.as_numpy_iterator()) + [1, 2, 3, 1, 2, 3] Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more From 32848ea8a6c12f2db70623f80c33227d69d5fa51 Mon Sep 17 00:00:00 2001 From: Jake Tae Date: Wed, 26 Feb 2020 20:15:07 +0900 Subject: [PATCH 050/253] Update docstring for consine_similarity The edited docstring correctly indicates the `y_true` and `y_pred` need not be normalized, and that passing a zero vector as one of the arguments will return 0 regardless of the proximity between true and predicted labels. --- tensorflow/python/keras/losses.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index 061e31140b7..15aed3f196e 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -1216,9 +1216,11 @@ def cosine_similarity(y_true, y_pred, axis=-1): Note that it is a negative quantity between -1 and 0, where 0 indicates orthogonality and values closer to -1 indicate greater similarity. This makes it usable as a loss function in a setting where you try to maximize the - proximity between predictions and targets. + proximity between predictions and targets. If either `y_true` or `y_pred` + is a zero vector, cosine similarity will be 0 regardless of the proximity + between predictions and targets. - `loss = -sum(y_true * y_pred)` + `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` Args: y_true: Tensor of true targets. From 640be01689cacc01cf9a05fa71c8d940f929a2b8 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 26 Feb 2020 14:56:45 +0100 Subject: [PATCH 051/253] Disable explicit batch test in trt_engine_op_test Currently we do not have a build() equivalent in the C++ API, therfore we cannot generate profiles. Without profiles we cannot test in explicit batch mode (which includes dynamic shapes mode too). --- .../tf2tensorrt/kernels/trt_engine_op_test.cc | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc index da8bd6686a7..a06010de1c7 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc @@ -238,12 +238,16 @@ TEST_F(TRTEngineOpTestBase, ExplicitBatch) { device_->resource_manager()->Lookup("TF-TRT", "myop", &cache_resource)); core::ScopedUnref sc(cache_resource); - // The cache should contain only one EngineContext, with a valid cuda_engine. + // Due to the way the engine lookup is implemented, explicit batch mode + // requires profile generation. Currently profile generaton is not enabled in + // this test therfore engine creation fails. + // + // TODO(Tamas) find a way to enable profile generation mode and test it auto cache = &cache_resource->cache_; - EXPECT_EQ(1, cache->size()); - ASSERT_EQ(1, cache->count({input_shape})); - EngineContext* ectx = cache->at({input_shape}).get(); - EXPECT_NE(ectx->cuda_engine, nullptr); + EXPECT_EQ(0, cache->size()); + // ASSERT_EQ(1, cache->count({input_shape})); + // EngineContext* ectx = cache->at({input_shape}).get(); + // EXPECT_NE(ectx->cuda_engine, nullptr); } TEST_F(TRTEngineOpTestBase, DynamicShapes) { @@ -267,12 +271,13 @@ TEST_F(TRTEngineOpTestBase, DynamicShapes) { device_->resource_manager()->Lookup("TF-TRT", "myop", &cache_resource)); core::ScopedUnref sc(cache_resource); - // The cache should contain only one EngineContext. + // We did not have profile generation mode therfore engine creation failed. + // TODO(Tamas) find a way to enable profile generation mode and test it auto cache = &cache_resource->cache_; - EXPECT_EQ(1, cache->size()); - ASSERT_EQ(1, cache->count({input_shape})); - EngineContext* ectx = cache->at({input_shape}).get(); - EXPECT_NE(ectx->cuda_engine, nullptr); + EXPECT_EQ(0, cache->size()); + // ASSERT_EQ(1, cache->count({input_shape})); + // EngineContext* ectx = cache->at({input_shape}).get(); + // EXPECT_NE(ectx->cuda_engine, nullptr); } template From d6e79390a5ab7ce86833becddfcc7009840118d3 Mon Sep 17 00:00:00 2001 From: Gaurav Singh Date: Wed, 26 Feb 2020 10:02:44 -0500 Subject: [PATCH 052/253] Fix compilation errors --- tensorflow/core/lib/jpeg/jpeg_mem.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index 8a94b39606e..aa80576365e 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -593,7 +593,7 @@ bool GetImageInfo(const void* srcdata, int datasize, int* width, int* height, namespace { bool CompressInternal(const uint8* srcdata, int width, int height, const CompressFlags& flags, tstring* output) { - if (output == nullptr) + if (output == nullptr) { LOG(ERROR) << "Output buffer is null: "; return false; } From 7b26cc2780cbd6b4a3ed7efe449bde0e60304b8f Mon Sep 17 00:00:00 2001 From: Ashutosh Hathidara Date: Thu, 27 Feb 2020 00:07:33 +0530 Subject: [PATCH 053/253] Modified doc --- tensorflow/python/data/ops/dataset_ops.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 59cba9d2d61..2c6e850f8a4 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -621,14 +621,6 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor): [3, 2]], dtype=int32), array([[b'A'], [b'B']], dtype=object)) - >>> # `from_tensor_slices` can also be used to repeat the examples - >>> # in the data. But it doesn't repeat the whole tensor like `from_tensors`, - >>> # instead it repeats the individual elements (slices) from the example. - >>> example = tf.constant([1,2,3]) - >>> dataset = tf.data.Dataset.from_tensor_slices(example).repeat(2) - >>> list(dataset1.as_numpy_iterator()) - [1, 2, 3, 1, 2, 3] - Note that if `tensors` contains a NumPy array, and eager execution is not enabled, the values will be embedded in the graph as one or more `tf.constant` operations. For large datasets (> 1 GB), this can waste From e76e68928fe1c2a9a4c4e0b47c7478dbdf60b8bf Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Wed, 26 Feb 2020 15:16:10 -0800 Subject: [PATCH 054/253] Make DNNL1.2 default lib for MKL backend --- .bazelrc | 1 + 1 file changed, 1 insertion(+) diff --git a/.bazelrc b/.bazelrc index a1f323c142d..26254facc26 100644 --- a/.bazelrc +++ b/.bazelrc @@ -136,6 +136,7 @@ build --host_java_toolchain=//third_party/toolchains/java:tf_java_toolchain # environment variable "TF_MKL_ROOT" every time before build. build:mkl --define=build_with_mkl=true --define=enable_mkl=true build:mkl --define=tensorflow_mkldnn_contraction_kernel=0 +build:mkl --define=build_with_mkl_dnn_v1_only=true build:mkl -c opt # This config option is used to enable MKL-DNN open source library only, From 1043101a8be1254ea208399e0f812b1629616d16 Mon Sep 17 00:00:00 2001 From: Yasir Modak <42785357+ymodak@users.noreply.github.com> Date: Wed, 26 Feb 2020 15:35:05 -0800 Subject: [PATCH 055/253] updating dead web link fixes 37047 --- tensorflow/python/profiler/model_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index a62930af9c7..12ef1078556 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -315,7 +315,7 @@ def profile(graph=None, """Profile model. Tutorials and examples can be found in: - https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler/README.md + https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/profiler/g3doc/python_api.md Args: graph: tf.Graph. If None and eager execution is not enabled, use From 6fb232e94fe6f59b3975343d2499c7f727f05c21 Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Thu, 27 Feb 2020 10:25:49 +0700 Subject: [PATCH 056/253] fix deps import --- tensorflow/python/autograph/operators/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index fe15cc9fd7f..757666f5f09 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -48,7 +48,7 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/autograph/utils", "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/ops/parallel_for:control_flow_ops", + "//tensorflow/python/ops/parallel_for", "//third_party/py/numpy", ], ) From 2776113c10a36827b9328e3430ac5f80310ef4fb Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Thu, 27 Feb 2020 11:09:16 +0700 Subject: [PATCH 057/253] fix deps import --- tensorflow/python/autograph/operators/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/autograph/operators/BUILD b/tensorflow/python/autograph/operators/BUILD index 757666f5f09..0969606670a 100644 --- a/tensorflow/python/autograph/operators/BUILD +++ b/tensorflow/python/autograph/operators/BUILD @@ -48,7 +48,6 @@ py_library( "//tensorflow/python:variables", "//tensorflow/python/autograph/utils", "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/ops/parallel_for", "//third_party/py/numpy", ], ) From 80c0ee8eaf1b20ab510271508d6d518a9bd3783a Mon Sep 17 00:00:00 2001 From: lyonguyen8697 Date: Thu, 27 Feb 2020 11:41:46 +0700 Subject: [PATCH 058/253] work around circular dependencies --- tensorflow/python/autograph/operators/py_builtins.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py index 5ce0a3fdc59..1fa619516ab 100644 --- a/tensorflow/python/autograph/operators/py_builtins.py +++ b/tensorflow/python/autograph/operators/py_builtins.py @@ -39,7 +39,6 @@ from tensorflow.python.ops import gen_string_ops from tensorflow.python.ops import list_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import sort_ops -from tensorflow.python.ops.parallel_for import control_flow_ops as parallel_ops from tensorflow.python.ops import check_ops from tensorflow.python.util import lazy_loader from tensorflow.python.util import nest @@ -50,6 +49,10 @@ from tensorflow.python.util import nest input_lib = lazy_loader.LazyLoader( 'input_lib', globals(), 'tensorflow.python.distribute.input_lib') +parallel_ops = lazy_loader.LazyLoader( + 'parallel_ops', globals(), + 'tensorflow.python.ops.parallel_for.control_flow_ops' +) UNSPECIFIED = object() From 775a828aade1b87811875ea0ddfa0554c442654b Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Fri, 20 Dec 2019 06:46:43 +0900 Subject: [PATCH 059/253] minor spelling tweaks --- .../api_def/base_api/api_def_ApplyFtrl.pbtxt | 4 +-- .../base_api/api_def_ApplyFtrlV2.pbtxt | 4 +-- ...oostedTreesCalculateBestFeatureSplit.pbtxt | 2 +- ...tedTreesCalculateBestGainsPerFeature.pbtxt | 4 +-- ...TreesSparseCalculateBestFeatureSplit.pbtxt | 2 +- .../api_def_CudnnRNNCanonicalToParamsV2.pbtxt | 2 +- .../api_def_CudnnRNNParamsToCanonicalV2.pbtxt | 2 +- .../api_def_DebugNumericSummaryV2.pbtxt | 2 +- ...ef_IsBoostedTreesEnsembleInitialized.pbtxt | 2 +- .../base_api/api_def_ResourceApplyFtrl.pbtxt | 4 +-- .../api_def_ResourceApplyFtrlV2.pbtxt | 4 +-- .../api_def_ResourceSparseApplyFtrlV2.pbtxt | 2 +- .../base_api/api_def_SparseApplyFtrlV2.pbtxt | 2 +- .../base_api/api_def_TridiagonalMatMul.pbtxt | 4 +-- .../core/common_runtime/colocation_graph.cc | 2 +- .../common_runtime/constant_folding_test.cc | 2 +- .../core/common_runtime/dynamic_device_mgr.cc | 2 +- .../common_runtime/dynamic_device_mgr_test.cc | 2 +- .../core/common_runtime/eager/context.h | 2 +- .../common_runtime/eager/eager_executor.h | 2 +- .../eager/eager_op_rewrite_registry.h | 2 +- .../common_runtime/eager/kernel_and_device.cc | 2 +- .../core/common_runtime/executor_test.cc | 2 +- tensorflow/core/common_runtime/function.cc | 4 +-- .../core/common_runtime/function_test.cc | 4 +-- .../core/common_runtime/gpu/gpu_device.cc | 2 +- .../core/common_runtime/gpu/gpu_device.h | 2 +- tensorflow/core/common_runtime/gpu/gpu_id.h | 4 +-- .../common_runtime/graph_execution_state.cc | 2 +- tensorflow/core/common_runtime/metrics.h | 2 +- .../common_runtime/optimization_registry.h | 2 +- .../core/common_runtime/partitioning_utils.h | 2 +- tensorflow/core/common_runtime/placer_test.cc | 8 ++--- .../process_function_library_runtime.h | 2 +- .../core/debug/debug_graph_utils_test.cc | 2 +- .../eager/cluster_function_library_runtime.cc | 2 +- .../distributed_runtime/master_session.cc | 2 +- .../rpc/grpc_master_service.cc | 2 +- .../distributed_runtime/rpc/grpc_state.cc | 6 ++-- .../core/distributed_runtime/rpc/grpc_state.h | 8 ++--- .../core/distributed_runtime/test_utils.h | 2 +- .../distributed_runtime/worker_interface.h | 2 +- tensorflow/core/framework/common_shape_fns.h | 2 +- tensorflow/core/framework/dataset.h | 4 +-- .../framework/dataset_stateful_op_whitelist.h | 2 +- tensorflow/core/framework/function.h | 2 +- tensorflow/core/framework/model.cc | 2 +- tensorflow/core/framework/node_def_util.h | 2 +- tensorflow/core/framework/run_handler.cc | 6 ++-- tensorflow/core/framework/session_state.h | 2 +- tensorflow/core/framework/stats_aggregator.h | 6 ++-- .../core/framework/variant_op_registry.h | 2 +- tensorflow/core/graph/gradients.cc | 4 +-- tensorflow/core/graph/graph_constructor.cc | 2 +- tensorflow/core/graph/graph_constructor.h | 2 +- .../core/graph/graph_constructor_test.cc | 2 +- tensorflow/core/graph/mkl_graph_util.h | 2 +- tensorflow/core/graph/mkl_layout_pass.cc | 4 +-- .../grappler/clusters/single_machine_test.cc | 2 +- .../core/grappler/costs/graph_properties.cc | 6 ++-- .../grappler/costs/graph_properties_test.cc | 4 +-- .../grappler/costs/op_level_cost_estimator.cc | 2 +- .../grappler/costs/op_level_cost_estimator.h | 2 +- .../costs/op_level_cost_estimator_test.cc | 16 +++++----- .../core/grappler/costs/virtual_scheduler.cc | 10 +++--- .../grappler/costs/virtual_scheduler_test.cc | 6 ++-- .../core/grappler/graph_analyzer/sig_node.cc | 2 +- tensorflow/core/grappler/mutable_graph_view.h | 2 +- .../core/grappler/mutable_graph_view_test.cc | 2 +- .../optimizers/arithmetic_optimizer.cc | 6 ++-- .../grappler/optimizers/constant_folding.cc | 4 +-- .../optimizers/constant_folding_test.cc | 8 ++--- .../optimizers/data/latency_all_edges_test.cc | 2 +- .../grappler/optimizers/function_optimizer.cc | 2 +- .../grappler/optimizers/layout_optimizer.cc | 6 ++-- .../grappler/optimizers/memory_optimizer.cc | 2 +- .../optimizers/memory_optimizer_test.cc | 2 +- .../grappler/optimizers/meta_optimizer.cc | 2 +- .../core/grappler/optimizers/meta_optimizer.h | 2 +- .../core/grappler/optimizers/model_pruner.cc | 2 +- .../grappler/optimizers/model_pruner_test.cc | 4 +-- .../optimizers/pin_to_host_optimizer.cc | 2 +- .../optimizers/scoped_allocator_optimizer.cc | 2 +- .../scoped_allocator_optimizer_test.cc | 2 +- tensorflow/core/grappler/utils/graph_view.cc | 2 +- .../core/grappler/utils/grappler_test.h | 4 +-- tensorflow/core/kernels/adjust_contrast_op.cc | 6 ++-- .../adaptive_shared_batch_scheduler.h | 2 +- .../batching_util/shared_batch_scheduler.h | 2 +- .../kernels/boosted_trees/quantile_ops.cc | 2 +- .../quantiles/weighted_quantiles_stream.h | 4 +-- .../core/kernels/boosted_trees/resources.cc | 2 +- .../core/kernels/check_numerics_op_gpu.cu.cc | 2 +- .../kernels/collective_nccl_broadcaster.cc | 2 +- tensorflow/core/kernels/conv_ops_fused_impl.h | 2 +- tensorflow/core/kernels/cuda_sparse.h | 2 +- tensorflow/core/kernels/cwise_op_add_1.cc | 2 +- tensorflow/core/kernels/cwise_op_select.cc | 4 +-- .../core/kernels/cwise_ops_gpu_gradients.cu.h | 2 +- .../core/kernels/data/captured_function.cc | 2 +- .../experimental/parse_example_dataset_op.cc | 6 ++-- .../data/experimental/stats_aggregator_ops.cc | 8 ++--- .../core/kernels/data/flat_map_dataset_op.cc | 2 +- .../data/parallel_interleave_dataset_op.cc | 2 +- .../parallel_interleave_dataset_op_test.cc | 4 +-- tensorflow/core/kernels/debug_ops.h | 4 +-- tensorflow/core/kernels/dense_update_ops.cc | 2 +- tensorflow/core/kernels/depthwise_conv_op.cc | 2 +- tensorflow/core/kernels/depthwise_conv_op.h | 2 +- tensorflow/core/kernels/dynamic_stitch_op.cc | 2 +- .../core/kernels/eigen_contraction_kernel.h | 6 ++-- .../core/kernels/eigen_cuboid_convolution.h | 6 ++-- .../kernels/eigen_spatial_convolutions-inl.h | 6 ++-- tensorflow/core/kernels/eigen_volume_patch.h | 2 +- .../core/kernels/gather_functor_batched.h | 2 +- .../kernels/gather_functor_batched_gpu.cu.h | 2 +- .../core/kernels/gather_functor_gpu.cu.h | 2 +- tensorflow/core/kernels/gpu_utils.h | 2 +- .../hexagon/hexagon_ops_definitions.cc | 4 +-- tensorflow/core/kernels/hinge-loss.h | 2 +- tensorflow/core/kernels/logistic-loss.h | 2 +- tensorflow/core/kernels/loss_test.cc | 2 +- tensorflow/core/kernels/matmul_op.cc | 2 +- tensorflow/core/kernels/mkl_aggregate_ops.cc | 2 +- .../core/kernels/mkl_conv_grad_filter_ops.cc | 2 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 4 +-- tensorflow/core/kernels/mkl_conv_ops.cc | 4 +-- tensorflow/core/kernels/mkl_conv_ops.h | 2 +- tensorflow/core/kernels/mkl_qmatmul_op.cc | 2 +- .../core/kernels/mkl_qmatmul_op_test.cc | 2 +- tensorflow/core/kernels/mkl_relu_op.cc | 6 +++- .../core/kernels/neon/depthwiseconv_float.h | 24 +++++++------- tensorflow/core/kernels/pooling_ops_common.h | 2 +- .../kernels/quantized_resize_bilinear_op.cc | 2 +- .../quantized_resize_bilinear_op_test.cc | 2 +- tensorflow/core/kernels/random_op_cpu.h | 2 +- tensorflow/core/kernels/record_yielder.h | 2 +- .../remote_fused_graph_execute_utils.cc | 2 +- tensorflow/core/kernels/resize_bicubic_op.cc | 16 +++++----- tensorflow/core/kernels/resize_bilinear_op.cc | 4 +-- .../core/kernels/resource_variable_ops.cc | 2 +- tensorflow/core/kernels/rnn/gru_ops.cc | 6 ++-- tensorflow/core/kernels/roll_op.cc | 32 +++++++++---------- tensorflow/core/kernels/sdca_internal.h | 4 +-- tensorflow/core/kernels/set_kernels.cc | 4 +-- tensorflow/core/kernels/smooth-hinge-loss.h | 2 +- .../core/kernels/sparse/sparse_cholesky_op.cc | 2 +- .../kernels/sparse_sparse_binary_op_shared.cc | 2 +- tensorflow/core/kernels/spectrogram.cc | 2 +- .../core/kernels/stateful_random_ops.cc | 2 +- tensorflow/core/kernels/string_split_op.cc | 2 +- tensorflow/core/kernels/tile_ops_gpu_impl.h | 2 +- .../core/kernels/tile_ops_gpu_impl_1.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_2.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_3.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_4.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_5.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_6.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_7.cu.cc | 2 +- .../core/kernels/tile_ops_gpu_impl_8.cu.cc | 2 +- .../core/kernels/unsorted_segment_join_op.cc | 2 +- tensorflow/core/lib/io/format.cc | 8 ++--- tensorflow/core/lib/io/format.h | 2 +- tensorflow/core/lib/io/table_test.cc | 4 +-- .../core/lib/random/random_distributions.h | 2 +- tensorflow/core/lib/wav/wav_io.h | 2 +- tensorflow/core/ops/array_ops.cc | 2 +- tensorflow/core/ops/array_ops_test.cc | 4 +-- tensorflow/core/ops/parsing_ops.cc | 2 +- tensorflow/core/ops/scoped_allocator_ops.cc | 2 +- tensorflow/core/ops/stateful_random_ops.cc | 2 +- tensorflow/core/ops/training_ops_test.cc | 4 +-- .../core/platform/cloud/curl_http_request.h | 2 +- .../core/platform/cloud/gcs_file_system.cc | 6 ++-- tensorflow/core/platform/denormal.cc | 4 +-- .../platform/hadoop/hadoop_file_system.cc | 2 +- tensorflow/core/platform/logger.h | 2 +- tensorflow/core/platform/macros.h | 2 +- tensorflow/core/platform/mutex.h | 2 +- .../core/platform/platform_strings_test.cc | 2 +- .../core/platform/profile_utils/cpu_utils.h | 2 +- .../core/platform/rocm_rocdl_path_test.cc | 2 +- tensorflow/core/platform/s3/s3_file_system.cc | 2 +- tensorflow/core/platform/tracing.h | 4 +-- tensorflow/core/platform/windows/port.cc | 2 +- tensorflow/core/profiler/g3doc/python_api.md | 2 +- .../profiler/internal/gpu/cupti_tracer.cc | 4 +-- .../core/profiler/internal/gpu/cupti_tracer.h | 2 +- .../core/profiler/internal/tfprof_code.cc | 2 +- .../core/profiler/internal/tfprof_node.cc | 2 +- .../core/profiler/internal/tfprof_utils.cc | 2 +- .../core/profiler/lib/profiler_session.h | 2 +- tensorflow/core/profiler/profiler.cc | 2 +- tensorflow/core/profiler/protobuf/BUILD | 2 +- .../core/protobuf/rewriter_config.proto | 2 +- .../core/protobuf/tpu/compile_metadata.proto | 2 +- tensorflow/core/util/bcast.h | 4 +-- .../core/util/command_line_flags_test.cc | 2 +- tensorflow/core/util/debug_events_writer.h | 2 +- .../core/util/debug_events_writer_test.cc | 2 +- .../core/util/example_proto_fast_parsing.cc | 6 ++-- .../core/util/example_proto_fast_parsing.h | 2 +- tensorflow/core/util/mkl_util.h | 10 +++--- tensorflow/core/util/stat_summarizer.cc | 4 +-- 204 files changed, 335 insertions(+), 331 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt index 77da9e4d510..0f49a18a114 100644 --- a/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ApplyFtrl.pbtxt @@ -33,13 +33,13 @@ END in_arg { name: "l1" description: <