Annotate alignment bytes in scoped allocator as initialized.
Padding in scoped allocator may be unitialized. This is okay because the uninitialized memory is not consumed meaningfully by a downstream op. This change annotates the padding with TF_ANNOTATE_MEMORY_IS_INITIALIZED to avoid msan warnings. PiperOrigin-RevId: 268051135
This commit is contained in:
parent
24888a277e
commit
0e5f75d3be
@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#include "tensorflow/core/common_runtime/scoped_allocator.h"
|
||||
|
||||
#include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
|
||||
#include "tensorflow/core/platform/dynamic_annotations.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
@ -34,7 +36,7 @@ ScopedAllocator::ScopedAllocator(const Tensor& backing_tensor, int32 scope_id,
|
||||
tbuf_->Ref();
|
||||
// Hold this until all expected_calls have been made.
|
||||
container->Ref();
|
||||
CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes);
|
||||
CHECK_GE(tbuf_->size(), fields.back().offset + fields.back().bytes_requested);
|
||||
}
|
||||
|
||||
ScopedAllocator::~ScopedAllocator() {
|
||||
@ -56,43 +58,66 @@ ScopedAllocator::~ScopedAllocator() {
|
||||
void* ScopedAllocator::AllocateRaw(int32 field_index, size_t num_bytes) {
|
||||
VLOG(1) << "ScopedAllocator index " << id_ << " AllocateRaw "
|
||||
<< "field " << field_index << " num_bytes " << num_bytes;
|
||||
mutex_lock l(mu_);
|
||||
if (expected_call_count_ <= 0) {
|
||||
LOG(ERROR) << "Scoped allocator " << name_
|
||||
<< " could not satisfy request for " << num_bytes
|
||||
<< " bytes, expected uses exhausted. ";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int32_t num_fields = static_cast<int32>(fields_.size());
|
||||
if (field_index >= num_fields) {
|
||||
LOG(ERROR) << "ScopedAllocator " << name_
|
||||
<< " received unexpected field number " << field_index;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const Field& f = fields_[field_index];
|
||||
if (num_bytes != f.bytes) {
|
||||
LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
|
||||
<< num_bytes << " bytes from field " << field_index
|
||||
<< " which has precalculated size " << f.bytes << " and offset "
|
||||
<< f.offset;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* ptr = static_cast<void*>((tbuf_->template base<char>() + f.offset));
|
||||
|
||||
++live_alloc_count_;
|
||||
--expected_call_count_;
|
||||
if (0 == expected_call_count_) {
|
||||
for (auto& f : fields_) {
|
||||
container_->Drop(f.scope_id, this);
|
||||
void* ptr = nullptr;
|
||||
const Field* field = nullptr;
|
||||
{
|
||||
mutex_lock l(mu_);
|
||||
if (expected_call_count_ <= 0) {
|
||||
LOG(ERROR) << "Scoped allocator " << name_
|
||||
<< " could not satisfy request for " << num_bytes
|
||||
<< " bytes, expected uses exhausted. ";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int32_t num_fields = static_cast<int32>(fields_.size());
|
||||
if (field_index >= num_fields) {
|
||||
LOG(ERROR) << "ScopedAllocator " << name_
|
||||
<< " received unexpected field number " << field_index;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
field = &fields_[field_index];
|
||||
if (num_bytes != field->bytes_requested) {
|
||||
LOG(ERROR) << "ScopedAllocator " << name_ << " got request for "
|
||||
<< num_bytes << " bytes from field " << field_index
|
||||
<< " which has precalculated size " << field->bytes_requested
|
||||
<< " and offset " << field->offset;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ptr = static_cast<void*>((tbuf_->template base<char>() + field->offset));
|
||||
|
||||
++live_alloc_count_;
|
||||
--expected_call_count_;
|
||||
if (0 == expected_call_count_) {
|
||||
for (auto& f : fields_) {
|
||||
container_->Drop(f.scope_id, this);
|
||||
}
|
||||
container_->Drop(id_, this);
|
||||
container_->Unref();
|
||||
container_ = nullptr;
|
||||
}
|
||||
container_->Drop(id_, this);
|
||||
container_->Unref();
|
||||
container_ = nullptr;
|
||||
}
|
||||
VLOG(1) << "AllocateRaw returning " << ptr;
|
||||
VLOG(2) << "AllocateRaw returning " << ptr << " bytes_requested "
|
||||
<< field->bytes_requested << " bytes_allocated "
|
||||
<< field->bytes_allocated;
|
||||
|
||||
// If there is overshoot due to alignment, let MSAN believe that the padding
|
||||
// is initialized. This is okay because we do not use this memory region for
|
||||
// anything meaningful.
|
||||
if (field->bytes_allocated > field->bytes_requested) {
|
||||
size_t extra_bytes = field->bytes_allocated - field->bytes_requested;
|
||||
void* extra_buf = static_cast<void*>(static_cast<char*>(ptr) +
|
||||
field->bytes_allocated - extra_bytes);
|
||||
VLOG(2) << "AllocateRaw requested " << num_bytes
|
||||
<< " bytes which is not divisible by kAllocatorAlignment="
|
||||
<< Allocator::kAllocatorAlignment << " and hence we allocated "
|
||||
<< field->bytes_allocated << ". Annotating " << extra_bytes
|
||||
<< " bytes starting at " << extra_buf
|
||||
<< " with TF_ANNOTATE_MEMORY_IS_INITIALIZED";
|
||||
TF_ANNOTATE_MEMORY_IS_INITIALIZED(extra_buf, extra_bytes);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,8 @@ class ScopedAllocator {
|
||||
struct Field {
|
||||
int32 scope_id;
|
||||
size_t offset;
|
||||
size_t bytes;
|
||||
size_t bytes_requested;
|
||||
size_t bytes_allocated;
|
||||
};
|
||||
// Field index that refers to backing tensor, not any aliased field.
|
||||
static const int32 kBackingIndex = -1;
|
||||
|
@ -166,21 +166,35 @@ size_t ScopedAllocatorMgr::PopulateFields(
|
||||
const DataType dtype, std::vector<ScopedAllocator::Field>* fields) {
|
||||
const int32 num_fields = static_cast<int32>(shapes.size());
|
||||
fields->resize(num_fields);
|
||||
// At the end of iteration `i`, `offset` points to the offset from the start
|
||||
// of the backing buffer until the end of `field[i].bytes_allocated`. This
|
||||
// is aligned to `kAllocatorAlignment`.
|
||||
size_t offset = 0;
|
||||
for (int32 i = 0; i < num_fields; ++i) {
|
||||
size_t bytes_requested = shapes[i].num_elements() * DataTypeSize(dtype);
|
||||
auto* field = &((*fields)[i]);
|
||||
field->scope_id = scope_id + 1 + i;
|
||||
field->bytes_requested = bytes_requested;
|
||||
field->offset = offset;
|
||||
offset += bytes_requested;
|
||||
|
||||
// Compute actual #bytes allocated, which may include padding due to
|
||||
// alignment.
|
||||
size_t bytes_allocated = bytes_requested;
|
||||
size_t overshoot = offset % Allocator::kAllocatorAlignment;
|
||||
if (overshoot > 0) {
|
||||
offset += (Allocator::kAllocatorAlignment - overshoot);
|
||||
size_t alignment_bytes = Allocator::kAllocatorAlignment - overshoot;
|
||||
bytes_allocated += alignment_bytes;
|
||||
offset += alignment_bytes;
|
||||
}
|
||||
size_t bytes = shapes[i].num_elements() * DataTypeSize(dtype);
|
||||
(*fields)[i].scope_id = scope_id + 1 + i;
|
||||
(*fields)[i].bytes = bytes;
|
||||
(*fields)[i].offset = offset;
|
||||
VLOG(1) << "field=" << i << " scope_id=" << (*fields)[i].scope_id
|
||||
<< " bytes=" << (*fields)[i].bytes
|
||||
<< " offset=" << (*fields)[i].offset;
|
||||
offset += bytes;
|
||||
field->bytes_allocated = bytes_allocated;
|
||||
|
||||
VLOG(1) << "field=" << i << " scope_id=" << field->scope_id
|
||||
<< " bytes_requested=" << field->bytes_requested
|
||||
<< " offset=" << field->offset
|
||||
<< " bytes_allocated=" << field->bytes_allocated;
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
@ -110,13 +110,13 @@ TEST_F(ScopedAllocatorMgrTest, PopulateFields) {
|
||||
InitTensor();
|
||||
PopulateFields();
|
||||
EXPECT_EQ(0, fields_[0].offset);
|
||||
EXPECT_EQ(512 * sizeof(float), fields_[0].bytes);
|
||||
EXPECT_EQ(512 * sizeof(float), fields_[0].bytes_requested);
|
||||
EXPECT_EQ(scope_id_ + 1, fields_[0].scope_id);
|
||||
EXPECT_EQ(512 * sizeof(float), fields_[1].offset);
|
||||
EXPECT_EQ(9 * sizeof(float), fields_[1].bytes);
|
||||
EXPECT_EQ(9 * sizeof(float), fields_[1].bytes_requested);
|
||||
EXPECT_EQ(scope_id_ + 2, fields_[1].scope_id);
|
||||
EXPECT_EQ(521 * sizeof(float) + AlignmentPadding(), fields_[2].offset);
|
||||
EXPECT_EQ(512 * sizeof(float), fields_[2].bytes);
|
||||
EXPECT_EQ(512 * sizeof(float), fields_[2].bytes_requested);
|
||||
EXPECT_EQ(scope_id_ + 3, fields_[2].scope_id);
|
||||
}
|
||||
|
||||
@ -185,9 +185,10 @@ TEST_F(ScopedAllocatorMgrTest, AllocatorInitFail) {
|
||||
fields_.resize(1);
|
||||
fields_[0].scope_id = scope_id_ + 1;
|
||||
fields_[0].offset = 0;
|
||||
fields_[0].bytes = backing_tensor_shape_.num_elements() * 2 * sizeof(float);
|
||||
// fields[0].offset + fields[0].bytes is larger than the size of the backing
|
||||
// tensor, so this check should fail
|
||||
fields_[0].bytes_requested =
|
||||
backing_tensor_shape_.num_elements() * 2 * sizeof(float);
|
||||
// fields[0].offset + fields[0].bytes_requested is larger than the size of the
|
||||
// backing tensor, so this check should fail
|
||||
EXPECT_DEATH(Status s = AddScopedAllocator(1, scope_id_), "");
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ class ScopedAllocatorOp : public OpKernel {
|
||||
// the subtensors to be allocated from it, taking into account
|
||||
// alignment considerations.
|
||||
ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
|
||||
size_t num_bytes = fields_.back().offset + fields_.back().bytes;
|
||||
size_t num_bytes = fields_.back().offset + fields_.back().bytes_allocated;
|
||||
num_elements_ = num_bytes / DataTypeSize(dtype_);
|
||||
OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
|
||||
errors::InvalidArgument(
|
||||
|
@ -91,8 +91,8 @@ void PrepOp(DataType dtype, int32 id,
|
||||
ScopedAllocatorMgr::PopulateFields(id, fields_shapes, dtype, fields);
|
||||
// We don't simply allocate a tensor with shape as backing_tensor_shape,
|
||||
// because we need to account for padding in the fields. We actually need a
|
||||
// tensor of size at least (fields[-1].offset + fields[-1].bytes).
|
||||
size_t num_bytes = fields->back().offset + fields->back().bytes;
|
||||
// tensor of size at least (fields[-1].offset + fields[-1].bytes_allocated).
|
||||
size_t num_bytes = fields->back().offset + fields->back().bytes_allocated;
|
||||
int32_t num_elements = num_bytes / DataTypeSize(dtype);
|
||||
CHECK_EQ(num_bytes % DataTypeSize(dtype), 0);
|
||||
|
||||
|
@ -158,7 +158,7 @@ class CollectiveOpTest(test.TestCase):
|
||||
# to `all_reduce` has an explicit device string. We don't use
|
||||
# `identity` because `cast` is more resilient to getting optimized
|
||||
# away by various optimization passes.
|
||||
input_tensor = math_ops.cast(device_tensors[j], dtypes.float64)
|
||||
input_tensor = math_ops.cast(device_tensors[j], dtypes.float16)
|
||||
collective_op = collective_ops.all_reduce(
|
||||
input_tensor, group_size, group_key, instances[j],
|
||||
'Add', 'Id')
|
||||
|
Loading…
Reference in New Issue
Block a user