Change hash function for Compress.
((a*b)>>18) & mask has higher throughput than (a*b)>>shift, and produces the same results when the hash table size is 2**14. In other cases, the hash function is still good, but it's not as necessary for that to be the case as the input is small anyway. This speeds up in encoding, especially in cases where hashing is a significant part of the encoding critical path (small or uncompressible files). PiperOrigin-RevId: 341498741 Change-Id: I359cbf5a38f680dd16fe5828fab20aae291915ef
This commit is contained in:
parent
ffc145f4cc
commit
47388e6e56
@ -22,10 +22,13 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
// The current implementation of snappy compresses the below block to 619 bytes.
|
static void CheckPrefixSuffix(const string& str, const string& prefix,
|
||||||
// We use this to validate the error messages. Please change this number if
|
const string& suffix) {
|
||||||
// a new snappy implementation compresses to a different size.
|
CHECK_GE(str.size(), prefix.size());
|
||||||
const int COMPRESSED_RECORD_SIZE = 619;
|
CHECK_GE(str.size(), suffix.size());
|
||||||
|
CHECK_EQ(str.substr(0, prefix.length()), prefix);
|
||||||
|
CHECK_EQ(str.substr(str.length() - suffix.length()), suffix);
|
||||||
|
}
|
||||||
|
|
||||||
static string GetRecord() {
|
static string GetRecord() {
|
||||||
static const string lorem_ipsum =
|
static const string lorem_ipsum =
|
||||||
@ -315,10 +318,12 @@ TEST(SnappyBuffers, SmallUncompressInputBuffer) {
|
|||||||
fprintf(stderr, "skipping compression tests\n");
|
fprintf(stderr, "skipping compression tests\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CHECK_EQ(TestMultipleWrites(10000, 10000, 10, 10000, 2, true),
|
Status status = TestMultipleWrites(10000, 10000, 10, 10000, 2, true);
|
||||||
errors::ResourceExhausted("Input buffer(size: 10 bytes) too small. ",
|
CHECK_EQ(status.code(), error::Code::RESOURCE_EXHAUSTED);
|
||||||
"Should be larger than ",
|
CheckPrefixSuffix(
|
||||||
COMPRESSED_RECORD_SIZE, " bytes."));
|
status.error_message(),
|
||||||
|
"Input buffer(size: 10 bytes) too small. Should be larger than ",
|
||||||
|
" bytes.");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(SnappyBuffers, SmallUncompressInputStream) {
|
TEST(SnappyBuffers, SmallUncompressInputStream) {
|
||||||
@ -337,9 +342,11 @@ TEST(SnappyBuffers, CorruptBlock) {
|
|||||||
fprintf(stderr, "skipping compression tests\n");
|
fprintf(stderr, "skipping compression tests\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CHECK_EQ(TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true),
|
Status status =
|
||||||
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
|
TestMultipleWrites(10000, 10000, 700, 10000, 2, true, 1, true);
|
||||||
" bytes from file. ", "Possible data corruption."));
|
CHECK_EQ(status.code(), error::Code::DATA_LOSS);
|
||||||
|
CheckPrefixSuffix(status.error_message(), "Failed to read ",
|
||||||
|
" bytes from file. Possible data corruption.");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(SnappyBuffers, CorruptBlockInputStream) {
|
TEST(SnappyBuffers, CorruptBlockInputStream) {
|
||||||
@ -347,10 +354,11 @@ TEST(SnappyBuffers, CorruptBlockInputStream) {
|
|||||||
fprintf(stderr, "skipping compression tests\n");
|
fprintf(stderr, "skipping compression tests\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CHECK_EQ(
|
Status status =
|
||||||
TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true),
|
TestMultipleWritesInputStream(10000, 10000, 700, 10000, 2, true, 1, true);
|
||||||
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
|
CHECK_EQ(status.code(), error::Code::DATA_LOSS);
|
||||||
" bytes from file. ", "Possible data corruption."));
|
CheckPrefixSuffix(status.error_message(), "Failed to read ",
|
||||||
|
" bytes from file. Possible data corruption.");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) {
|
TEST(SnappyBuffers, CorruptBlockLargeInputBuffer) {
|
||||||
@ -367,10 +375,11 @@ TEST(SnappyBuffers, CorruptBlockLargeInputStream) {
|
|||||||
fprintf(stderr, "skipping compression tests\n");
|
fprintf(stderr, "skipping compression tests\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
CHECK_EQ(TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2, true, 1,
|
Status status = TestMultipleWritesInputStream(10000, 10000, 2000, 10000, 2,
|
||||||
true),
|
true, 1, true);
|
||||||
errors::DataLoss("Failed to read ", COMPRESSED_RECORD_SIZE,
|
CHECK_EQ(status.code(), error::Code::DATA_LOSS);
|
||||||
" bytes from file. Possible data corruption."));
|
CheckPrefixSuffix(status.error_message(), "Failed to read ",
|
||||||
|
" bytes from file. Possible data corruption.");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(SnappyBuffers, Tell) {
|
TEST(SnappyBuffers, Tell) {
|
||||||
|
Loading…
Reference in New Issue
Block a user