From 55832255a8d9f2a9cfadb83fbb3efa5db0f1a755 Mon Sep 17 00:00:00 2001 From: Haakan Younes Date: Tue, 2 Apr 2019 21:41:17 -0400 Subject: [PATCH] Fix issue with file sizes in MemmappedFileSystem. File sizes are incorrectly computed from padded offsets. This can interfer with proper reading of binary protos. By keeping track of file sizes on creation, we have access to the right file sizes when reading files. --- tensorflow/core/util/memmapped_file_system.cc | 3 +-- tensorflow/core/util/memmapped_file_system.proto | 1 + tensorflow/core/util/memmapped_file_system_writer.cc | 8 +++++--- tensorflow/core/util/memmapped_file_system_writer.h | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/util/memmapped_file_system.cc b/tensorflow/core/util/memmapped_file_system.cc index b1773a25171..00c527f1af6 100644 --- a/tensorflow/core/util/memmapped_file_system.cc +++ b/tensorflow/core/util/memmapped_file_system.cc @@ -235,8 +235,7 @@ Status MemmappedFileSystem::InitializeFromFile(Env* env, if (!directory_ .insert(std::make_pair( element_iter->name(), - FileRegion(element_iter->offset(), - prev_element_offset - element_iter->offset()))) + FileRegion(element_iter->offset(), element_iter->length()))) .second) { return errors::DataLoss("Corrupted memmapped model file: ", filename, " Duplicate name of internal component ", diff --git a/tensorflow/core/util/memmapped_file_system.proto b/tensorflow/core/util/memmapped_file_system.proto index bf6cb4af296..a1db8b2a31f 100644 --- a/tensorflow/core/util/memmapped_file_system.proto +++ b/tensorflow/core/util/memmapped_file_system.proto @@ -21,6 +21,7 @@ option cc_enable_arenas = true; message MemmappedFileSystemDirectoryElement { uint64 offset = 1; string name = 2; + uint64 length = 3; } // A directory of regions in a memmapped file. diff --git a/tensorflow/core/util/memmapped_file_system_writer.cc b/tensorflow/core/util/memmapped_file_system_writer.cc index 9556ee385f6..483c8be7933 100644 --- a/tensorflow/core/util/memmapped_file_system_writer.cc +++ b/tensorflow/core/util/memmapped_file_system_writer.cc @@ -47,7 +47,7 @@ Status MemmappedFileSystemWriter::SaveTensor(const Tensor& tensor, } // Adds pad for correct alignment after memmapping. TF_RETURN_IF_ERROR(AdjustAlignment(Allocator::kAllocatorAlignment)); - AddToDirectoryElement(element_name); + AddToDirectoryElement(element_name, tensor_data.size()); const auto result = output_file_->Append(tensor_data); if (result.ok()) { output_file_offset_ += tensor_data.size(); @@ -69,8 +69,8 @@ Status MemmappedFileSystemWriter::SaveProtobuf( MemmappedFileSystem::kMemmappedPackagePrefix, " and include [A-Za-z0-9_.]"); } - AddToDirectoryElement(element_name); const string encoded = message.SerializeAsString(); + AddToDirectoryElement(element_name, encoded.size()); const auto res = output_file_->Append(encoded); if (res.ok()) { output_file_offset_ += encoded.size(); @@ -124,11 +124,13 @@ Status MemmappedFileSystemWriter::AdjustAlignment(uint64 alignment) { return Status::OK(); } -void MemmappedFileSystemWriter::AddToDirectoryElement(const string& name) { +void MemmappedFileSystemWriter::AddToDirectoryElement(const string& name, + uint64 length) { MemmappedFileSystemDirectoryElement* new_directory_element = directory_.add_element(); new_directory_element->set_offset(output_file_offset_); new_directory_element->set_name(name); + new_directory_element->set_length(length); } } // namespace tensorflow diff --git a/tensorflow/core/util/memmapped_file_system_writer.h b/tensorflow/core/util/memmapped_file_system_writer.h index 2cebaa256da..884b4b8bc63 100644 --- a/tensorflow/core/util/memmapped_file_system_writer.h +++ b/tensorflow/core/util/memmapped_file_system_writer.h @@ -40,7 +40,7 @@ class MemmappedFileSystemWriter { private: Status AdjustAlignment(uint64 alignment); - void AddToDirectoryElement(const string& element_name); + void AddToDirectoryElement(const string& element_name, uint64 length); MemmappedFileSystemDirectory directory_; // The current offset in the file, to support alignment. uint64 output_file_offset_ = 0;