Annotate alignment bytes in scoped allocator as initialized.

Padding in scoped allocator may be unitialized.  This is okay because the
uninitialized memory is not consumed meaningfully by a downstream op.  This
change annotates the padding with TF_ANNOTATE_MEMORY_IS_INITIALIZED to avoid
msan warnings.

PiperOrigin-RevId: 268051135
This commit is contained in:
Ayush Dubey 2019-09-09 12:26:11 -07:00 committed by TensorFlower Gardener
parent 24888a277e
commit 0e5f75d3be
7 changed files with 97 additions and 56 deletions

View File

@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
==============================================================================*/ ==============================================================================*/
#include "tensorflow/core/common_runtime/scoped_allocator.h" #include "tensorflow/core/common_runtime/scoped_allocator.h"
#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h" #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
#include "tensorflow/core/platform/dynamic_annotations.h"
namespace tensorflow { namespace tensorflow {
@ -34,7 +36,7 @@ ScopedAllocator::ScopedAllocator(const Tensor& backing_tensor, int32 scope_id,
tbuf_->Ref(); tbuf_->Ref();
// Hold this until all expected_calls have been made. // Hold this until all expected_calls have been made.
container->Ref(); container->Ref();
CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes); CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes_requested);
} }
ScopedAllocator::~ScopedAllocator() { ScopedAllocator::~ScopedAllocator() {
@ -56,43 +58,66 @@ ScopedAllocator::~ScopedAllocator() {
void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) { void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) {
VLOG(1) << "ScopedAllocator index " << id_ << " AllocateRaw " VLOG(1) << "ScopedAllocator index " << id_ << " AllocateRaw "
<< "field " << field_index << " num_bytes " << num_bytes; << "field " << field_index << " num_bytes " << num_bytes;
mutex_lock l(mu_); void* ptr = nullptr;
if (expected_call_count_ <= 0) { const Field* field = nullptr;
LOG(ERROR) << "Scoped allocator " << name_ {
<< " could not satisfy request for " << num_bytes mutex_lock l(mu_);
<< " bytes, expected uses exhausted. "; if (expected_call_count_ <= 0) {
return nullptr; LOG(ERROR) << "Scoped allocator " << name_
} << " could not satisfy request for " << num_bytes
<< " bytes, expected uses exhausted. ";
int32_t num_fields = static_cast<int32>(fields_.size()); return nullptr;
if (field_index >= num_fields) { }
LOG(ERROR) << "ScopedAllocator " << name_
<< " received unexpected field number " << field_index; int32_t num_fields = static_cast<int32>(fields_.size());
return nullptr; if (field_index >= num_fields) {
} LOG(ERROR) << "ScopedAllocator " << name_
<< " received unexpected field number " << field_index;
const Field& f = fields_[field_index]; return nullptr;
if (num_bytes != f.bytes) { }
LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
<< num_bytes << " bytes from field " << field_index field = &fields_[field_index];
<< " which has precalculated size " << f.bytes << " and offset " if (num_bytes != field->bytes_requested) {
<< f.offset; LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
return nullptr; << num_bytes << " bytes from field " << field_index
} << " which has precalculated size " << field->bytes_requested
<< " and offset " << field->offset;
void* ptr = static_cast<void*>((tbuf_->template base<char>() + f.offset)); return nullptr;
}
++live_alloc_count_;
--expected_call_count_; ptr = static_cast<void*>((tbuf_->template base<char>() + field->offset));
if (0 == expected_call_count_) {
for (auto& f : fields_) { ++live_alloc_count_;
container_->Drop(f.scope_id, this); --expected_call_count_;
if (0 == expected_call_count_) {
for (auto& f : fields_) {
container_->Drop(f.scope_id, this);
}
container_->Drop(id_, this);
container_->Unref();
container_ = nullptr;
} }
container_->Drop(id_, this);
container_->Unref();
container_ = nullptr;
} }
VLOG(1) << "AllocateRaw returning " << ptr; VLOG(2) << "AllocateRaw returning " << ptr << " bytes_requested "
<< field->bytes_requested << " bytes_allocated "
<< field->bytes_allocated;
// If there is overshoot due to alignment, let MSAN believe that the padding
// is initialized. This is okay because we do not use this memory region for
// anything meaningful.
if (field->bytes_allocated > field->bytes_requested) {
size_t extra_bytes = field->bytes_allocated - field->bytes_requested;
void* extra_buf = static_cast<void*>(static_cast<char*>(ptr) +
field->bytes_allocated - extra_bytes);
VLOG(2) << "AllocateRaw requested " << num_bytes
<< " bytes which is not divisible by kAllocatorAlignment="
<< Allocator::kAllocatorAlignment << " and hence we allocated "
<< field->bytes_allocated << ". Annotating " << extra_bytes
<< " bytes starting at " << extra_buf
<< " with TF_ANNOTATE_MEMORY_IS_INITIALIZED";
TF_ANNOTATE_MEMORY_IS_INITIALIZED(extra_buf, extra_bytes);
}
return ptr; return ptr;
} }

View File

@ -35,7 +35,8 @@ class ScopedAllocator {
struct Field { struct Field {
int32 scope_id; int32 scope_id;
size_t offset; size_t offset;
size_t bytes; size_t bytes_requested;
size_t bytes_allocated;
}; };
// Field index that refers to backing tensor, not any aliased field. // Field index that refers to backing tensor, not any aliased field.
static const int32 kBackingIndex = -1; static const int32 kBackingIndex = -1;

View File

@ -166,21 +166,35 @@ size_t ScopedAllocatorMgr::PopulateFields(
const DataType dtype, std::vector<ScopedAllocator::Field>* fields) { const DataType dtype, std::vector<ScopedAllocator::Field>* fields) {
const int32 num_fields = static_cast<int32>(shapes.size()); const int32 num_fields = static_cast<int32>(shapes.size());
fields->resize(num_fields); fields->resize(num_fields);
// At the end of iteration `i`, `offset` points to the offset from the start
// of the backing buffer until the end of `field[i].bytes_allocated`. This
// is aligned to `kAllocatorAlignment`.
size_t offset = 0; size_t offset = 0;
for (int32 i = 0; i < num_fields; ++i) { for (int32 i = 0; i < num_fields; ++i) {
size_t bytes_requested = shapes[i].num_elements() * DataTypeSize(dtype);
auto* field = &((*fields)[i]);
field->scope_id = scope_id + 1 + i;
field->bytes_requested = bytes_requested;
field->offset = offset;
offset += bytes_requested;
// Compute actual #bytes allocated, which may include padding due to
// alignment.
size_t bytes_allocated = bytes_requested;
size_t overshoot = offset % Allocator::kAllocatorAlignment; size_t overshoot = offset % Allocator::kAllocatorAlignment;
if (overshoot > 0) { if (overshoot > 0) {
offset += (Allocator::kAllocatorAlignment - overshoot); size_t alignment_bytes = Allocator::kAllocatorAlignment - overshoot;
bytes_allocated += alignment_bytes;
offset += alignment_bytes;
} }
size_t bytes = shapes[i].num_elements() * DataTypeSize(dtype); field->bytes_allocated = bytes_allocated;
(*fields)[i].scope_id = scope_id + 1 + i;
(*fields)[i].bytes = bytes; VLOG(1) << "field=" << i << " scope_id=" << field->scope_id
(*fields)[i].offset = offset; << " bytes_requested=" << field->bytes_requested
VLOG(1) << "field=" << i << " scope_id=" << (*fields)[i].scope_id << " offset=" << field->offset
<< " bytes=" << (*fields)[i].bytes << " bytes_allocated=" << field->bytes_allocated;
<< " offset=" << (*fields)[i].offset;
offset += bytes;
} }
return offset; return offset;
} }

View File

@ -110,13 +110,13 @@ TEST_F(ScopedAllocatorMgrTest, PopulateFields) {
InitTensor(); InitTensor();
PopulateFields(); PopulateFields();
EXPECT_EQ(0, fields_[0].offset); EXPECT_EQ(0, fields_[0].offset);
EXPECT_EQ(512 * sizeof(float), fields_[0].bytes); EXPECT_EQ(512 * sizeof(float), fields_[0].bytes_requested);
EXPECT_EQ(scope_id_ + 1, fields_[0].scope_id); EXPECT_EQ(scope_id_ + 1, fields_[0].scope_id);
EXPECT_EQ(512 * sizeof(float), fields_[1].offset); EXPECT_EQ(512 * sizeof(float), fields_[1].offset);
EXPECT_EQ(9 * sizeof(float), fields_[1].bytes); EXPECT_EQ(9 * sizeof(float), fields_[1].bytes_requested);
EXPECT_EQ(scope_id_ + 2, fields_[1].scope_id); EXPECT_EQ(scope_id_ + 2, fields_[1].scope_id);
EXPECT_EQ(521 * sizeof(float) + AlignmentPadding(), fields_[2].offset); EXPECT_EQ(521 * sizeof(float) + AlignmentPadding(), fields_[2].offset);
EXPECT_EQ(512 * sizeof(float), fields_[2].bytes); EXPECT_EQ(512 * sizeof(float), fields_[2].bytes_requested);
EXPECT_EQ(scope_id_ + 3, fields_[2].scope_id); EXPECT_EQ(scope_id_ + 3, fields_[2].scope_id);
} }
@ -185,9 +185,10 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorInitFail) {
fields_.resize(1); fields_.resize(1);
fields_[0].scope_id = scope_id_ + 1; fields_[0].scope_id = scope_id_ + 1;
fields_[0].offset = 0; fields_[0].offset = 0;
fields_[0].bytes = backing_tensor_shape_.num_elements() * 2 * sizeof(float); fields_[0].bytes_requested =
// fields[0].offset + fields[0].bytes is larger than the size of the backing backing_tensor_shape_.num_elements() * 2 * sizeof(float);
// tensor, so this check should fail // fields[0].offset + fields[0].bytes_requested is larger than the size of the
// backing tensor, so this check should fail
EXPECT_DEATH(Status s = AddScopedAllocator(1, scope_id_), ""); EXPECT_DEATH(Status s = AddScopedAllocator(1, scope_id_), "");
} }

View File

@ -39,7 +39,7 @@ class ScopedAllocatorOp : public OpKernel {
// the subtensors to be allocated from it, taking into account // the subtensors to be allocated from it, taking into account
// alignment considerations. // alignment considerations.
ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_); ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
size_t num_bytes = fields_.back().offset + fields_.back().bytes; size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated;
num_elements_ = num_bytes / DataTypeSize(dtype_); num_elements_ = num_bytes / DataTypeSize(dtype_);
OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0, OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
errors::InvalidArgument( errors::InvalidArgument(

View File

@ -91,8 +91,8 @@ void PrepOp(DataType dtype, int32 id,
ScopedAllocatorMgr::PopulateFields(id, fields_shapes, dtype, fields); ScopedAllocatorMgr::PopulateFields(id, fields_shapes, dtype, fields);
// We don't simply allocate a tensor with shape as backing_tensor_shape, // We don't simply allocate a tensor with shape as backing_tensor_shape,
// because we need to account for padding in the fields. We actually need a // because we need to account for padding in the fields. We actually need a
// tensor of size at least (fields[-1].offset + fields[-1].bytes). // tensor of size at least (fields[-1].offset + fields[-1].bytes_allocated).
size_t num_bytes = fields->back().offset + fields->back().bytes; size_t num_bytes = fields->back().offset + fields->back().bytes_allocated;
int32_t num_elements = num_bytes / DataTypeSize(dtype); int32_t num_elements = num_bytes / DataTypeSize(dtype);
CHECK_EQ(num_bytes % DataTypeSize(dtype), 0); CHECK_EQ(num_bytes % DataTypeSize(dtype), 0);

View File

@ -158,7 +158,7 @@ class CollectiveOpTest(test.TestCase):
# to `all_reduce` has an explicit device string. We don't use # to `all_reduce` has an explicit device string. We don't use
# `identity` because `cast` is more resilient to getting optimized # `identity` because `cast` is more resilient to getting optimized
# away by various optimization passes. # away by various optimization passes.
input_tensor = math_ops.cast(device_tensors[j], dtypes.float64) input_tensor = math_ops.cast(device_tensors[j], dtypes.float16)
collective_op = collective_ops.all_reduce( collective_op = collective_ops.all_reduce(
input_tensor, group_size, group_key, instances[j], input_tensor, group_size, group_key, instances[j],
'Add', 'Id') 'Add', 'Id')