From 3e2b03121175aebd965174f2ee55efd5aa903039 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 23 Mar 2020 16:15:37 -0700 Subject: [PATCH] Cache the out-of-range status and avoid repeating reads in BufferedInputStream. PiperOrigin-RevId: 302541219 Change-Id: I0f624d6605ddb9aa21a24b809fff1d5de7c6d76e --- .../core/lib/io/buffered_inputstream.cc | 5 +- .../core/lib/io/buffered_inputstream_test.cc | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/lib/io/buffered_inputstream.cc b/tensorflow/core/lib/io/buffered_inputstream.cc index 94479a1149f..6f268de8cac 100644 --- a/tensorflow/core/lib/io/buffered_inputstream.cc +++ b/tensorflow/core/lib/io/buffered_inputstream.cc @@ -49,8 +49,7 @@ Status BufferedInputStream::FillBuffer() { Status s = input_stream_->ReadNBytes(size_, &buf_); pos_ = 0; limit_ = buf_.size(); - if (buf_.empty()) { - DCHECK(!s.ok()); + if (!s.ok()) { file_status_ = s; } return s; @@ -93,7 +92,7 @@ Status BufferedInputStream::ReadNBytes(int64 bytes_to_read, tstring* result) { bytes_to_read); } result->clear(); - if (!file_status_.ok() && bytes_to_read > 0) { + if (pos_ == limit_ && !file_status_.ok() && bytes_to_read > 0) { return file_status_; } result->reserve(bytes_to_read); diff --git a/tensorflow/core/lib/io/buffered_inputstream_test.cc b/tensorflow/core/lib/io/buffered_inputstream_test.cc index c4af1e707b4..d6c07344ba3 100644 --- a/tensorflow/core/lib/io/buffered_inputstream_test.cc +++ b/tensorflow/core/lib/io/buffered_inputstream_test.cc @@ -30,6 +30,37 @@ static std::vector BufferSizes() { 12, 13, 14, 15, 16, 17, 18, 19, 20, 65536}; } +// This class will only return OutOfRange error once to make sure that +// BufferedInputStream is able to cache the error. +class ReadOnceInputStream : public InputStreamInterface { + public: + ReadOnceInputStream() : start_(true) {} + + virtual Status ReadNBytes(int64 bytes_to_read, tstring* result) { + if (bytes_to_read < 11) { + return errors::InvalidArgument("Not reading all bytes: ", bytes_to_read); + } + if (start_) { + *result = "0123456789"; + start_ = false; + return errors::OutOfRange("Out of range."); + } + return errors::InvalidArgument( + "Redudant call to ReadNBytes after an OutOfRange error."); + } + + int64 Tell() const override { return start_ ? 0 : 10; } + + // Resets the stream to the beginning. + Status Reset() override { + start_ = true; + return Status::OK(); + } + + private: + bool start_; +}; + TEST(BufferedInputStream, ReadLine_Empty) { Env* env = Env::Default(); string fname; @@ -196,6 +227,32 @@ TEST(BufferedInputStream, ReadNBytes) { } } +TEST(BufferedInputStream, OutOfRangeCache) { + for (auto buf_size : BufferSizes()) { + if (buf_size < 11) { + continue; + } + ReadOnceInputStream input_stream; + tstring read; + BufferedInputStream in(&input_stream, buf_size); + EXPECT_EQ(0, in.Tell()); + TF_ASSERT_OK(in.ReadNBytes(3, &read)); + EXPECT_EQ(read, "012"); + EXPECT_EQ(3, in.Tell()); + TF_ASSERT_OK((in.ReadNBytes(7, &read))); + EXPECT_EQ(read, "3456789"); + EXPECT_EQ(10, in.Tell()); + Status s = in.ReadNBytes(5, &read); + // Make sure the read is failing with OUT_OF_RANGE error. If it is failing + // with other errors, it is not caching the OUT_OF_RANGE properly. + EXPECT_EQ(error::OUT_OF_RANGE, s.code()) << s; + EXPECT_EQ(read, ""); + // Empty read shouldn't cause an error even at the end of the file. + TF_ASSERT_OK(in.ReadNBytes(0, &read)); + EXPECT_EQ(read, ""); + } +} + TEST(BufferedInputStream, SkipNBytes) { Env* env = Env::Default(); string fname;