Change hash function for Compress.

((a*b)>>18) & mask has higher throughput than (a*b)>>shift, and produces the same results when the hash table size is 2**14. In other cases, the hash function is still good, but it's not as necessary for that to be the case as the input is small anyway. This speeds up in encoding, especially in cases where hashing is a significant part of the encoding critical path (small or uncompressible files). PiperOrigin-RevId: 341498741 Change-Id: I359cbf5a38f680dd16fe5828fab20aae291915ef
2020-11-09 15:32:45 -08:00 · 2020-11-09 15:32:45 -08:00 · 47388e6e56
commit 47388e6e56
parent ffc145f4cc
1 changed files with 28 additions and 19 deletions
--- a/tensorflow/core/lib/io/snappy/snappy_test.cc
+++ b/tensorflow/core/lib/io/snappy/snappy_test.cc
@ -22,10 +22,13 @@ limitations under the License.
 namespace tensorflow {
-// The current implementation of snappy compresses the below block to 619 bytes.
+static void CheckPrefixSuffix(const string& str, const string& prefix,
-// We use this to validate the error messages. Please change this number if
+                              const string& suffix) {
-// a new snappy implementation compresses to a different size.
+  CHECK_GE(str.size(), prefix.size());
-const int COMPRESSED_RECORD_SIZE = 619;
+  CHECK_GE(str.size(), suffix.size());
  CHECK_EQ(str.substr(0, prefix.length()), prefix);
  CHECK_EQ(str.substr(str.length() - suffix.length()), suffix);
 }
 static string GetRecord() {
  static const string lorem_ipsum =
@ -315,10 +318,12 @@ TEST(SnappyBuffers, SmallUncompressInputBuffer) {
    fprintf(stderr, "skipping compression tests\n");
    return;
  }
-  CHECK_EQ(TestMultipleWrites(10000, 10000, 10, 10000, 2, true),
+  Status status = TestMultipleWrites(10000, 10000, 10, 10000, 2, true);
-           errors::ResourceExhausted("Input buffer(size: 10 bytes) too small. ",
+  CHECK_EQ(status.code(), error::Code::RESOURCE_EXHAUSTED);
-                                     "Should be larger than ",
+  CheckPrefixSuffix(
-                                     COMPRESSED_RECORD_SIZE, " bytes."));
+      status.error_message(),
      "Input buffer(size: 10 bytes) too small. Should be larger than ",
      " bytes.");
 }
 TEST(SnappyBuffers, SmallUncompressInputStream) {
@ -337,9 +342,11 @@ TEST(SnappyBuffers, CorruptBlock) {
    fprintf(stderr, "skipping compression tests\n");
    return;
  }
-  CHECK_EQ(TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true),
+  Status status =
-           errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
+      TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true);
-                            " bytes from file. ", "Possible data corruption."));
+  CHECK_EQ(status.code(), error::Code::DATA_LOSS);
  CheckPrefixSuffix(status.error_message(), "Failed to read ",
                    " bytes from file. Possible data corruption.");
 }
 TEST(SnappyBuffers, CorruptBlockInputStream) {
@ -347,10 +354,11 @@ TEST(SnappyBuffers, CorruptBlockInputStream) {
    fprintf(stderr, "skipping compression tests\n");
    return;
  }
-  CHECK_EQ(
+  Status status =
-      TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true),
+      TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true);
-      errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
+  CHECK_EQ(status.code(), error::Code::DATA_LOSS);
-                       " bytes from file. ", "Possible data corruption."));
+  CheckPrefixSuffix(status.error_message(), "Failed to read ",
                    " bytes from file. Possible data corruption.");
 }
 TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) {
@ -367,10 +375,11 @@ TEST(SnappyBuffers, CorruptBlockLargeInputStream) {
    fprintf(stderr, "skipping compression tests\n");
    return;
  }
-  CHECK_EQ(TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2, true, 1,
+  Status status = TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2,
-                                         true),
+                                                true, 1, true);
-           errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
+  CHECK_EQ(status.code(), error::Code::DATA_LOSS);
-                            " bytes from file. Possible data corruption."));
+  CheckPrefixSuffix(status.error_message(), "Failed to read ",
                    " bytes from file. Possible data corruption.");
 }
 TEST(SnappyBuffers, Tell) {