Fix issue with file sizes in MemmappedFileSystem.

File sizes are incorrectly computed from padded offsets. This can
interfer with proper reading of binary protos. By keeping track of file
sizes on creation, we have access to the right file sizes when reading
files.
This commit is contained in:
Haakan Younes 2019-04-02 21:41:17 -04:00
parent 0b86daa483
commit 55832255a8
4 changed files with 8 additions and 6 deletions

View File

@ -235,8 +235,7 @@ Status MemmappedFileSystem::InitializeFromFile(Env* env,
if (!directory_ if (!directory_
.insert(std::make_pair( .insert(std::make_pair(
element_iter->name(), element_iter->name(),
FileRegion(element_iter->offset(), FileRegion(element_iter->offset(), element_iter->length())))
prev_element_offset - element_iter->offset())))
.second) { .second) {
return errors::DataLoss("Corrupted memmapped model file: ", filename, return errors::DataLoss("Corrupted memmapped model file: ", filename,
" Duplicate name of internal component ", " Duplicate name of internal component ",

View File

@ -21,6 +21,7 @@ option cc_enable_arenas = true;
message MemmappedFileSystemDirectoryElement { message MemmappedFileSystemDirectoryElement {
uint64 offset = 1; uint64 offset = 1;
string name = 2; string name = 2;
uint64 length = 3;
} }
// A directory of regions in a memmapped file. // A directory of regions in a memmapped file.

View File

@ -47,7 +47,7 @@ Status MemmappedFileSystemWriter::SaveTensor(const Tensor& tensor,
} }
// Adds pad for correct alignment after memmapping. // Adds pad for correct alignment after memmapping.
TF_RETURN_IF_ERROR(AdjustAlignment(Allocator::kAllocatorAlignment)); TF_RETURN_IF_ERROR(AdjustAlignment(Allocator::kAllocatorAlignment));
AddToDirectoryElement(element_name); AddToDirectoryElement(element_name, tensor_data.size());
const auto result = output_file_->Append(tensor_data); const auto result = output_file_->Append(tensor_data);
if (result.ok()) { if (result.ok()) {
output_file_offset_ += tensor_data.size(); output_file_offset_ += tensor_data.size();
@ -69,8 +69,8 @@ Status MemmappedFileSystemWriter::SaveProtobuf(
MemmappedFileSystem::kMemmappedPackagePrefix, MemmappedFileSystem::kMemmappedPackagePrefix,
" and include [A-Za-z0-9_.]"); " and include [A-Za-z0-9_.]");
} }
AddToDirectoryElement(element_name);
const string encoded = message.SerializeAsString(); const string encoded = message.SerializeAsString();
AddToDirectoryElement(element_name, encoded.size());
const auto res = output_file_->Append(encoded); const auto res = output_file_->Append(encoded);
if (res.ok()) { if (res.ok()) {
output_file_offset_ += encoded.size(); output_file_offset_ += encoded.size();
@ -124,11 +124,13 @@ Status MemmappedFileSystemWriter::AdjustAlignment(uint64 alignment) {
return Status::OK(); return Status::OK();
} }
void MemmappedFileSystemWriter::AddToDirectoryElement(const string& name) { void MemmappedFileSystemWriter::AddToDirectoryElement(const string& name,
uint64 length) {
MemmappedFileSystemDirectoryElement* new_directory_element = MemmappedFileSystemDirectoryElement* new_directory_element =
directory_.add_element(); directory_.add_element();
new_directory_element->set_offset(output_file_offset_); new_directory_element->set_offset(output_file_offset_);
new_directory_element->set_name(name); new_directory_element->set_name(name);
new_directory_element->set_length(length);
} }
} // namespace tensorflow } // namespace tensorflow

View File

@ -40,7 +40,7 @@ class MemmappedFileSystemWriter {
private: private:
Status AdjustAlignment(uint64 alignment); Status AdjustAlignment(uint64 alignment);
void AddToDirectoryElement(const string& element_name); void AddToDirectoryElement(const string& element_name, uint64 length);
MemmappedFileSystemDirectory directory_; MemmappedFileSystemDirectory directory_;
// The current offset in the file, to support alignment. // The current offset in the file, to support alignment.
uint64 output_file_offset_ = 0; uint64 output_file_offset_ = 0;