Change hash function for Compress.

((a*b)>>18) & mask has higher throughput than (a*b)>>shift, and produces the
same results when the hash table size is 2**14. In other cases, the hash
function is still good, but it's not as necessary for that to be the case as
the input is small anyway. This speeds up in encoding, especially in cases
where hashing is a significant part of the encoding critical path (small or
uncompressible files).

PiperOrigin-RevId: 341498741
Change-Id: I359cbf5a38f680dd16fe5828fab20aae291915ef
This commit is contained in:
Luca Versari 2020-11-09 15:32:45 -08:00 committed by TensorFlower Gardener
parent ffc145f4cc
commit 47388e6e56

View File

@ -22,10 +22,13 @@ limitations under the License.
namespace tensorflow { namespace tensorflow {
// The current implementation of snappy compresses the below block to 619 bytes. static void CheckPrefixSuffix(const string& str, const string& prefix,
// We use this to validate the error messages. Please change this number if const string& suffix) {
// a new snappy implementation compresses to a different size. CHECK_GE(str.size(), prefix.size());
const int COMPRESSED_RECORD_SIZE = 619; CHECK_GE(str.size(), suffix.size());
CHECK_EQ(str.substr(0, prefix.length()), prefix);
CHECK_EQ(str.substr(str.length() - suffix.length()), suffix);
}
static string GetRecord() { static string GetRecord() {
static const string lorem_ipsum = static const string lorem_ipsum =
@ -315,10 +318,12 @@ TEST(SnappyBuffers, SmallUncompressInputBuffer) {
fprintf(stderr, "skipping compression tests\n"); fprintf(stderr, "skipping compression tests\n");
return; return;
} }
CHECK_EQ(TestMultipleWrites(10000, 10000, 10, 10000, 2, true), Status status = TestMultipleWrites(10000, 10000, 10, 10000, 2, true);
errors::ResourceExhausted("Input buffer(size: 10 bytes) too small. ", CHECK_EQ(status.code(), error::Code::RESOURCE_EXHAUSTED);
"Should be larger than ", CheckPrefixSuffix(
COMPRESSED_RECORD_SIZE, " bytes.")); status.error_message(),
"Input buffer(size: 10 bytes) too small. Should be larger than ",
" bytes.");
} }
TEST(SnappyBuffers, SmallUncompressInputStream) { TEST(SnappyBuffers, SmallUncompressInputStream) {
@ -337,9 +342,11 @@ TEST(SnappyBuffers, CorruptBlock) {
fprintf(stderr, "skipping compression tests\n"); fprintf(stderr, "skipping compression tests\n");
return; return;
} }
CHECK_EQ(TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true), Status status =
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true);
" bytes from file. ", "Possible data corruption.")); CHECK_EQ(status.code(), error::Code::DATA_LOSS);
CheckPrefixSuffix(status.error_message(), "Failed to read ",
" bytes from file. Possible data corruption.");
} }
TEST(SnappyBuffers, CorruptBlockInputStream) { TEST(SnappyBuffers, CorruptBlockInputStream) {
@ -347,10 +354,11 @@ TEST(SnappyBuffers, CorruptBlockInputStream) {
fprintf(stderr, "skipping compression tests\n"); fprintf(stderr, "skipping compression tests\n");
return; return;
} }
CHECK_EQ( Status status =
TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true), TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true);
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, CHECK_EQ(status.code(), error::Code::DATA_LOSS);
" bytes from file. ", "Possible data corruption.")); CheckPrefixSuffix(status.error_message(), "Failed to read ",
" bytes from file. Possible data corruption.");
} }
TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) { TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) {
@ -367,10 +375,11 @@ TEST(SnappyBuffers, CorruptBlockLargeInputStream) {
fprintf(stderr, "skipping compression tests\n"); fprintf(stderr, "skipping compression tests\n");
return; return;
} }
CHECK_EQ(TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2, true, 1, Status status = TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2,
true), true, 1, true);
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE, CHECK_EQ(status.code(), error::Code::DATA_LOSS);
" bytes from file. Possible data corruption.")); CheckPrefixSuffix(status.error_message(), "Failed to read ",
" bytes from file. Possible data corruption.");
} }
TEST(SnappyBuffers, Tell) { TEST(SnappyBuffers, Tell) {