From 0315fa71402e08c0fb77f5d422554b51dfb8251c Mon Sep 17 00:00:00 2001 From: Mihai Maruseac <mihaimaruseac@google.com> Date: Fri, 18 Sep 2020 16:54:17 -0700 Subject: [PATCH] Prevent format string vulnerability in `tf.strings.as_string`. The `printf` format specifier only allows `#`, `0`, `-`, `+` and space as flag characters. Others are interpreted as width/precision/length modifier or conversion specifiers. If a character does not fit into any of these sets `printf` just displays it. Also add a test suite for `tf.strings.as_string`. Also fix the issue where the flag character was used only if width was specified. PiperOrigin-RevId: 332553548 Change-Id: Ie57cf2a7c14d1a36097642794c14329db669bbba --- tensorflow/core/kernels/BUILD | 18 ++ tensorflow/core/kernels/as_string_op.cc | 19 +- tensorflow/core/kernels/as_string_op_test.cc | 245 +++++++++++++++++++ 3 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/kernels/as_string_op_test.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 7da864a6027..14f7d99bf2e 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -6085,6 +6085,24 @@ tf_kernel_library( deps = STRING_DEPS, ) +tf_cc_test( + name = "as_string_op_test", + size = "small", + srcs = ["as_string_op_test.cc"], + deps = [ + ":as_string_op", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_kernel_library( name = "unicode_ops", prefix = "unicode_ops", diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index 8341909fbc8..b9af976a654 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -65,9 +65,26 @@ class AsStringOp : public OpKernel { OP_REQUIRES(ctx, !(scientific && shortest), errors::InvalidArgument( "Cannot select both scientific and shortest notation")); + format_ = "%"; + if (!fill_string.empty()) { + switch (fill_string[0]) { + case ' ': + case '+': + case '-': + case '0': + case '#': + strings::Appendf(&format_, "%s", fill_string.c_str()); + break; + default: + bool fill_not_supported = true; + OP_REQUIRES(ctx, !fill_not_supported, + errors::InvalidArgument("Fill argument not supported: \"", + fill_string, "\"")); + } + } if (width > -1) { - strings::Appendf(&format_, "%s%d", fill_string.c_str(), width); + strings::Appendf(&format_, "%d", width); } if (precision > -1) { strings::Appendf(&format_, ".%d", precision); diff --git a/tensorflow/core/kernels/as_string_op_test.cc b/tensorflow/core/kernels/as_string_op_test.cc new file mode 100644 index 00000000000..dff78e25e72 --- /dev/null +++ b/tensorflow/core/kernels/as_string_op_test.cc @@ -0,0 +1,245 @@ +/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace { + +class AsStringGraphTest : public OpsTestBase { + protected: + Status Init(DataType input_type, const string& fill = "", int width = -1, + int precision = -1, bool scientific = false, + bool shortest = false) { + TF_CHECK_OK(NodeDefBuilder("op", "AsString") + .Input(FakeInput(input_type)) + .Attr("fill", fill) + .Attr("precision", precision) + .Attr("scientific", scientific) + .Attr("shortest", shortest) + .Attr("width", width) + .Finalize(node_def())); + return InitOp(); + } +}; + +TEST_F(AsStringGraphTest, Int8) { + TF_ASSERT_OK(Init(DT_INT8)); + + AddInputFromArray<int8>(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>(&expected, {"-42", "0", "42"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Int64) { + TF_ASSERT_OK(Init(DT_INT64)); + + AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>(&expected, {"-42", "0", "42"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatDefault) { + TF_ASSERT_OK(Init(DT_FLOAT)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>( + &expected, {"-42.000000", "0.000000", "3.141590", "42.000000"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatScientific) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>(&expected, {"-4.200000e+01", "0.000000e+00", + "3.141590e+00", "4.200000e+01"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatShortest) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1, + /*scientific=*/false, /*shortest=*/true)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>(&expected, {"-42", "0", "3.14159", "42"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatPrecisionOnly) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/2)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>(&expected, {"-42.00", "0.00", "3.14", "42.00"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FloatWidthOnly) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/5)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>( + &expected, {"-42.000000", "0.000000", "3.141590", "42.000000"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Float_5_2_Format) { + TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"", /*width=*/5, /*precision=*/2)); + + AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues<tstring>(&expected, {"-42.00", " 0.00", " 3.14", "42.00"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Complex) { + TF_ASSERT_OK(Init(DT_COMPLEX64, /*fill=*/"", /*width=*/5, /*precision=*/2)); + + AddInputFromArray<complex64>(TensorShape({3}), {{-4, 2}, {0}, {3.14159, -1}}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>( + &expected, {"(-4.00, 2.00)", "( 0.00, 0.00)", "( 3.14,-1.00)"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, Bool) { + TF_ASSERT_OK(Init(DT_BOOL)); + + AddInputFromArray<bool>(TensorShape({2}), {true, false}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({2})); + test::FillValues<tstring>(&expected, {"true", "false"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, String) { + Status s = Init(DT_STRING); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + "Value for attr 'T' of string is not in the list of allowed values")); +} + +TEST_F(AsStringGraphTest, OnlyOneOfScientificAndShortest) { + Status s = Init(DT_FLOAT, /*fill=*/"", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true, /*shortest=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), + "Cannot select both scientific and shortest notation")); +} + +TEST_F(AsStringGraphTest, NoShortestForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/-1, + /*scientific=*/false, /*shortest=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + "scientific and shortest format not supported for datatype")); +} + +TEST_F(AsStringGraphTest, NoScientificForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/-1, + /*scientific=*/true); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains( + s.error_message(), + "scientific and shortest format not supported for datatype")); +} + +TEST_F(AsStringGraphTest, NoPrecisionForNonFloat) { + Status s = Init(DT_INT32, /*fill=*/"", /*width=*/-1, /*precision=*/5); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains(s.error_message(), + "precision not supported for datatype")); +} + +TEST_F(AsStringGraphTest, LongFill) { + Status s = Init(DT_INT32, /*fill=*/"asdf"); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE(absl::StrContains(s.error_message(), + "Fill string must be one or fewer characters")); +} + +TEST_F(AsStringGraphTest, FillWithZero) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"0", /*width=*/4)); + + AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>(&expected, {"-042", "0000", "0042"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithSpace) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/" ", /*width=*/4)); + + AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>(&expected, {" -42", " 0", " 42"}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithChar1) { + TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"-", /*width=*/4)); + + AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({3})); + test::FillValues<tstring>(&expected, {"-42 ", "0 ", "42 "}); + test::ExpectTensorEqual<tstring>(expected, *GetOutput(0)); +} + +TEST_F(AsStringGraphTest, FillWithChar3) { + Status s = Init(DT_INT32, /*fill=*/"s"); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), "Fill argument not supported")); +} + +TEST_F(AsStringGraphTest, FillWithChar4) { + Status s = Init(DT_INT32, /*fill=*/"n"); + ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); + ASSERT_TRUE( + absl::StrContains(s.error_message(), "Fill argument not supported")); +} + +} // end namespace +} // end namespace tensorflow