Do a hard failure if deallocation of a ScopedDevicePointer fails.
Deallocations are called from destructors, and it's not possible to return status from them. Furthermore, failures only occur if the datastructure gets into an inconsistent state. PiperOrigin-RevId: 249862507
This commit is contained in:
parent
1188b9e764
commit
7a9d4a61f1
@ -25,6 +25,7 @@ limitations under the License.
|
||||
#include "tensorflow/compiler/xla/status_macros.h"
|
||||
#include "tensorflow/compiler/xla/types.h"
|
||||
#include "tensorflow/compiler/xla/util.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/stream_executor/device_memory_allocator.h"
|
||||
|
||||
@ -237,7 +238,7 @@ Status AllocationTracker::DecrementRefCount(se::DeviceMemoryBase device_memory,
|
||||
Allocation& allocation = it->second;
|
||||
TF_RET_CHECK(allocation.ref_count >= 1);
|
||||
if (allocation.ref_count == 1) {
|
||||
allocation.device_memory.Free();
|
||||
TF_RETURN_IF_ERROR(allocation.device_memory.Free());
|
||||
allocation_map.erase(it);
|
||||
} else {
|
||||
allocation.ref_count--;
|
||||
|
@ -19,6 +19,8 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/span.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
#include "tensorflow/stream_executor/device_memory.h"
|
||||
#include "tensorflow/stream_executor/lib/statusor.h"
|
||||
@ -80,13 +82,13 @@ class ScopedDeviceMemory {
|
||||
|
||||
// Releases the memory that was provided in the constructor, through the
|
||||
// "parent" StreamExecutor.
|
||||
~ScopedDeviceMemory() { Free(); }
|
||||
~ScopedDeviceMemory() { TF_CHECK_OK(Free()); }
|
||||
|
||||
// Moves ownership of the memory from other to this object.
|
||||
//
|
||||
// Postcondition: other == nullptr.
|
||||
ScopedDeviceMemory &operator=(ScopedDeviceMemory &&other) {
|
||||
Free();
|
||||
TF_CHECK_OK(Free());
|
||||
wrapped_ = other.Release();
|
||||
allocator_ = other.allocator_;
|
||||
device_ordinal_ = other.device_ordinal_;
|
||||
@ -132,7 +134,7 @@ class ScopedDeviceMemory {
|
||||
int device_ordinal() const { return device_ordinal_; }
|
||||
|
||||
// Frees the existing memory, resets the wrapped memory to null.
|
||||
void Free();
|
||||
port::Status Free();
|
||||
|
||||
private:
|
||||
DeviceMemory<ElemT> wrapped_; // Value we wrap with scoped-release.
|
||||
@ -236,15 +238,13 @@ class StreamExecutorMemoryAllocator : public DeviceMemoryAllocator {
|
||||
};
|
||||
|
||||
template <typename ElemT>
|
||||
void ScopedDeviceMemory<ElemT>::Free() {
|
||||
port::Status ScopedDeviceMemory<ElemT>::Free() {
|
||||
if (!wrapped_.is_null()) {
|
||||
DCHECK(allocator_ != nullptr);
|
||||
auto status = allocator_->Deallocate(device_ordinal_, wrapped_);
|
||||
if (!status.ok()) {
|
||||
LOG(WARNING) << "Deallocating buffer " << wrapped_.opaque() << " failed";
|
||||
}
|
||||
CHECK(allocator_ != nullptr) << "Owning pointer in inconsistent state";
|
||||
TF_RETURN_IF_ERROR(allocator_->Deallocate(device_ordinal_, wrapped_));
|
||||
}
|
||||
wrapped_ = DeviceMemory<ElemT>{};
|
||||
return port::Status::OK();
|
||||
}
|
||||
|
||||
} // namespace stream_executor
|
||||
|
@ -795,7 +795,7 @@ ScopedDeviceMemory<ElemT>::ScopedDeviceMemory(
|
||||
std::vector<ElemT> local(values);
|
||||
if (!parent->SynchronousMemcpy(ptr(), const_cast<const ElemT *>(&local[0]),
|
||||
ptr()->size())) {
|
||||
Free();
|
||||
TF_CHECK_OK(Free());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user