Annotate alignment bytes in scoped allocator as initialized.

Padding in scoped allocator may be unitialized.  This is okay because the
uninitialized memory is not consumed meaningfully by a downstream op.  This
change annotates the padding with TF_ANNOTATE_MEMORY_IS_INITIALIZED to avoid
msan warnings.

PiperOrigin-RevId: 268051135
This commit is contained in:
Ayush Dubey 2019-09-09 12:26:11 -07:00 committed by TensorFlower Gardener
parent 24888a277e
commit 0e5f75d3be
7 changed files with 97 additions and 56 deletions

View File

@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/common_runtime/scoped_allocator.h"
#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
#include "tensorflow/core/platform/dynamic_annotations.h"
namespace tensorflow {
@ -34,7 +36,7 @@ ScopedAllocator::ScopedAllocator(const Tensor& backing_tensor, int32 scope_id,
tbuf_->Ref();
// Hold this until all expected_calls have been made.
container->Ref();
CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes);
CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes_requested);
}
ScopedAllocator::~ScopedAllocator() {
@ -56,43 +58,66 @@ ScopedAllocator::~ScopedAllocator() {
void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) {
VLOG(1) << "ScopedAllocator index " << id_ << " AllocateRaw "
<< "field " << field_index << " num_bytes " << num_bytes;
mutex_lock l(mu_);
if (expected_call_count_ <= 0) {
LOG(ERROR) << "Scoped allocator " << name_
<< " could not satisfy request for " << num_bytes
<< " bytes, expected uses exhausted. ";
return nullptr;
}
int32_t num_fields = static_cast<int32>(fields_.size());
if (field_index >= num_fields) {
LOG(ERROR) << "ScopedAllocator " << name_
<< " received unexpected field number " << field_index;
return nullptr;
}
const Field& f = fields_[field_index];
if (num_bytes != f.bytes) {
LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
<< num_bytes << " bytes from field " << field_index
<< " which has precalculated size " << f.bytes << " and offset "
<< f.offset;
return nullptr;
}
void* ptr = static_cast<void*>((tbuf_->template base<char>() + f.offset));
++live_alloc_count_;
--expected_call_count_;
if (0 == expected_call_count_) {
for (auto& f : fields_) {
container_->Drop(f.scope_id, this);
void* ptr = nullptr;
const Field* field = nullptr;
{
mutex_lock l(mu_);
if (expected_call_count_ <= 0) {
LOG(ERROR) << "Scoped allocator " << name_
<< " could not satisfy request for " << num_bytes
<< " bytes, expected uses exhausted. ";
return nullptr;
}
int32_t num_fields = static_cast<int32>(fields_.size());
if (field_index >= num_fields) {
LOG(ERROR) << "ScopedAllocator " << name_
<< " received unexpected field number " << field_index;
return nullptr;
}
field = &fields_[field_index];
if (num_bytes != field->bytes_requested) {
LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
<< num_bytes << " bytes from field " << field_index
<< " which has precalculated size " << field->bytes_requested
<< " and offset " << field->offset;
return nullptr;
}
ptr = static_cast<void*>((tbuf_->template base<char>() + field->offset));
++live_alloc_count_;
--expected_call_count_;
if (0 == expected_call_count_) {
for (auto& f : fields_) {
container_->Drop(f.scope_id, this);
}
container_->Drop(id_, this);
container_->Unref();
container_ = nullptr;
}
container_->Drop(id_, this);
container_->Unref();
container_ = nullptr;
}
VLOG(1) << "AllocateRaw returning " << ptr;
VLOG(2) << "AllocateRaw returning " << ptr << " bytes_requested "
<< field->bytes_requested << " bytes_allocated "
<< field->bytes_allocated;
// If there is overshoot due to alignment, let MSAN believe that the padding
// is initialized. This is okay because we do not use this memory region for
// anything meaningful.
if (field->bytes_allocated > field->bytes_requested) {
size_t extra_bytes = field->bytes_allocated - field->bytes_requested;
void* extra_buf = static_cast<void*>(static_cast<char*>(ptr) +
field->bytes_allocated - extra_bytes);
VLOG(2) << "AllocateRaw requested " << num_bytes
<< " bytes which is not divisible by kAllocatorAlignment="
<< Allocator::kAllocatorAlignment << " and hence we allocated "
<< field->bytes_allocated << ". Annotating " << extra_bytes
<< " bytes starting at " << extra_buf
<< " with TF_ANNOTATE_MEMORY_IS_INITIALIZED";
TF_ANNOTATE_MEMORY_IS_INITIALIZED(extra_buf, extra_bytes);
}
return ptr;
}

View File

@ -35,7 +35,8 @@ class ScopedAllocator {
struct Field {
int32 scope_id;
size_t offset;
size_t bytes;
size_t bytes_requested;
size_t bytes_allocated;
};
// Field index that refers to backing tensor, not any aliased field.
static const int32 kBackingIndex = -1;

View File

@ -166,21 +166,35 @@ size_t ScopedAllocatorMgr::PopulateFields(
const DataType dtype, std::vector<ScopedAllocator::Field>* fields) {
const int32 num_fields = static_cast<int32>(shapes.size());
fields->resize(num_fields);
// At the end of iteration `i`, `offset` points to the offset from the start
// of the backing buffer until the end of `field[i].bytes_allocated`. This
// is aligned to `kAllocatorAlignment`.
size_t offset = 0;
for (int32 i = 0; i < num_fields; ++i) {
size_t bytes_requested = shapes[i].num_elements() * DataTypeSize(dtype);
auto* field = &((*fields)[i]);
field->scope_id = scope_id + 1 + i;
field->bytes_requested = bytes_requested;
field->offset = offset;
offset += bytes_requested;
// Compute actual #bytes allocated, which may include padding due to
// alignment.
size_t bytes_allocated = bytes_requested;
size_t overshoot = offset % Allocator::kAllocatorAlignment;
if (overshoot > 0) {
offset += (Allocator::kAllocatorAlignment - overshoot);
size_t alignment_bytes = Allocator::kAllocatorAlignment - overshoot;
bytes_allocated += alignment_bytes;
offset += alignment_bytes;
}
size_t bytes = shapes[i].num_elements() * DataTypeSize(dtype);
(*fields)[i].scope_id = scope_id + 1 + i;
(*fields)[i].bytes = bytes;
(*fields)[i].offset = offset;
VLOG(1) << "field=" << i << " scope_id=" << (*fields)[i].scope_id
<< " bytes=" << (*fields)[i].bytes
<< " offset=" << (*fields)[i].offset;
offset += bytes;
field->bytes_allocated = bytes_allocated;
VLOG(1) << "field=" << i << " scope_id=" << field->scope_id
<< " bytes_requested=" << field->bytes_requested
<< " offset=" << field->offset
<< " bytes_allocated=" << field->bytes_allocated;
}
return offset;
}

View File

@ -110,13 +110,13 @@ TEST_F(ScopedAllocatorMgrTest, PopulateFields) {
InitTensor();
PopulateFields();
EXPECT_EQ(0, fields_[0].offset);
EXPECT_EQ(512 * sizeof(float), fields_[0].bytes);
EXPECT_EQ(512 * sizeof(float), fields_[0].bytes_requested);
EXPECT_EQ(scope_id_ + 1, fields_[0].scope_id);
EXPECT_EQ(512 * sizeof(float), fields_[1].offset);
EXPECT_EQ(9 * sizeof(float), fields_[1].bytes);
EXPECT_EQ(9 * sizeof(float), fields_[1].bytes_requested);
EXPECT_EQ(scope_id_ + 2, fields_[1].scope_id);
EXPECT_EQ(521 * sizeof(float) + AlignmentPadding(), fields_[2].offset);
EXPECT_EQ(512 * sizeof(float), fields_[2].bytes);
EXPECT_EQ(512 * sizeof(float), fields_[2].bytes_requested);
EXPECT_EQ(scope_id_ + 3, fields_[2].scope_id);
}
@ -185,9 +185,10 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorInitFail) {
fields_.resize(1);
fields_[0].scope_id = scope_id_ + 1;
fields_[0].offset = 0;
fields_[0].bytes = backing_tensor_shape_.num_elements() * 2 * sizeof(float);
// fields[0].offset + fields[0].bytes is larger than the size of the backing
// tensor, so this check should fail
fields_[0].bytes_requested =
backing_tensor_shape_.num_elements() * 2 * sizeof(float);
// fields[0].offset + fields[0].bytes_requested is larger than the size of the
// backing tensor, so this check should fail
EXPECT_DEATH(Status s = AddScopedAllocator(1, scope_id_), "");
}

View File

@ -39,7 +39,7 @@ class ScopedAllocatorOp : public OpKernel {
// the subtensors to be allocated from it, taking into account
// alignment considerations.
ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
size_t num_bytes = fields_.back().offset + fields_.back().bytes;
size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated;
num_elements_ = num_bytes / DataTypeSize(dtype_);
OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
errors::InvalidArgument(

View File

@ -91,8 +91,8 @@ void PrepOp(DataType dtype, int32 id,
ScopedAllocatorMgr::PopulateFields(id, fields_shapes, dtype, fields);
// We don't simply allocate a tensor with shape as backing_tensor_shape,
// because we need to account for padding in the fields. We actually need a
// tensor of size at least (fields[-1].offset + fields[-1].bytes).
size_t num_bytes = fields->back().offset + fields->back().bytes;
// tensor of size at least (fields[-1].offset + fields[-1].bytes_allocated).
size_t num_bytes = fields->back().offset + fields->back().bytes_allocated;
int32_t num_elements = num_bytes / DataTypeSize(dtype);
CHECK_EQ(num_bytes % DataTypeSize(dtype), 0);

View File

@ -158,7 +158,7 @@ class CollectiveOpTest(test.TestCase):
# to `all_reduce` has an explicit device string. We don't use
# `identity` because `cast` is more resilient to getting optimized
# away by various optimization passes.
input_tensor = math_ops.cast(device_tensors[j], dtypes.float64)
input_tensor = math_ops.cast(device_tensors[j], dtypes.float16)
collective_op = collective_ops.all_reduce(
input_tensor, group_size, group_key, instances[j],
'Add', 'Id')