[XLA] Add support to limit outstanding async copies in memory space assignment.
In case we hit the limit, we try prefetching later and evictions at different intervals. In the worst case, we bail and keep the buffer in the default memory space. PiperOrigin-RevId: 264258761
This commit is contained in:
parent
d22bd44fcf
commit
a0ee95db22
@ -742,6 +742,12 @@ GlobalDecreasingSizeBestFitHeap::FindChunkCandidate(
|
||||
offset = std::max(offset, RoundUpToNearest(chunk.chunk_end(), alignment_));
|
||||
}
|
||||
use_free_chunk_if_smaller(offset, result_.heap_size - offset);
|
||||
// When preferred offset is provided and the preferred offset is larger than
|
||||
// the current heap size, simply use the preferred offset provided.
|
||||
if (result_.heap_size <= preferred_offset) {
|
||||
chunk_candidate.heap_size = preferred_offset + buffer_interval.size;
|
||||
min_fit_chunk = {preferred_offset, buffer_interval.size};
|
||||
}
|
||||
|
||||
if (min_fit_chunk.offset == -1) {
|
||||
// Increase the heap size to fit in the last free chunk.
|
||||
|
@ -19,8 +19,8 @@ namespace xla {
|
||||
|
||||
namespace {
|
||||
// Define a dummy chunk for chunks that will be allocated in the default memory
|
||||
// space.
|
||||
const HeapSimulator::Chunk kDefaultMemorySpaceDummyChunk{-1, -1};
|
||||
// space and for keeping track of number of asynchronous copies.
|
||||
const HeapSimulator::Chunk kDummyChunk{-1, -1};
|
||||
} // namespace
|
||||
|
||||
std::vector<const GlobalDecreasingSizeBestFitHeap::BufferInterval*>
|
||||
@ -91,12 +91,12 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() {
|
||||
|
||||
MemorySpaceAssignment::AllocationSequence* allocation_sequence =
|
||||
&(*allocation_map_)[&buffer];
|
||||
if (keep_in_default_memory) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// At this point, none of the colocated buffers contain any phi buffers.
|
||||
for (const BufferInterval* colocated_interval : colocated_intervals) {
|
||||
if (keep_in_default_memory) {
|
||||
break;
|
||||
}
|
||||
const HloValue* value = colocated_interval->buffer;
|
||||
int64 definition_time =
|
||||
instruction_schedule_->at(value->defining_instruction());
|
||||
@ -114,15 +114,27 @@ HeapSimulator::Result AlternateMemoryBestFitHeap::Finish() {
|
||||
// Skip allocating buffers for bitcast uses. The uses that feed from
|
||||
// bitcasts will be handled specially.
|
||||
if (use.instruction->opcode() != HloOpcode::kBitcast) {
|
||||
FindAllocation(definition_time, use_time, value->defining_position(),
|
||||
use, value, colocated_interval->size,
|
||||
allocation_sequence);
|
||||
if (!FindAllocation(definition_time, use_time,
|
||||
value->defining_position(), use, value,
|
||||
colocated_interval->size, allocation_sequence)) {
|
||||
// If the allocation finding failed (e.g., due to running out of
|
||||
// asynchronous copies), then fall back to allocating the buffer
|
||||
// entirely in the default memory.
|
||||
pending_chunks_.clear();
|
||||
pending_async_copies_.clear();
|
||||
allocation_sequence->clear();
|
||||
keep_in_default_memory = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// If there are multiple uses, they can try using the memory
|
||||
// allocation already at the alternate memory.
|
||||
definition_time = use_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CommitPendingChunks();
|
||||
}
|
||||
|
||||
if (VLOG_IS_ON(3)) {
|
||||
@ -147,7 +159,32 @@ HloInstruction* AlternateMemoryBestFitHeap::GetInstructionAt(int64 time) const {
|
||||
return flattened_instruction_sequence_->instructions()[time];
|
||||
}
|
||||
|
||||
void AlternateMemoryBestFitHeap::FindAllocation(
|
||||
void AlternateMemoryBestFitHeap::CommitPendingChunks() {
|
||||
for (auto interval_and_chunk : pending_chunks_) {
|
||||
VLOG(3) << "Committing chunk: " << interval_and_chunk.first.start << "-"
|
||||
<< interval_and_chunk.first.end << " : ["
|
||||
<< interval_and_chunk.second.chunk.offset << ", "
|
||||
<< interval_and_chunk.second.chunk.size << "]";
|
||||
CommitChunk(interval_and_chunk.first, interval_and_chunk.second);
|
||||
}
|
||||
pending_chunks_.clear();
|
||||
// Also add the pending async copies to the interval tree.
|
||||
if (max_outstanding_async_copies_ >= 0) {
|
||||
for (auto interval : pending_async_copies_) {
|
||||
async_copy_interval_tree_.Add(interval.first, interval.second,
|
||||
kDummyChunk);
|
||||
}
|
||||
}
|
||||
pending_async_copies_.clear();
|
||||
}
|
||||
|
||||
void AlternateMemoryBestFitHeap::AddToPendingChunks(
|
||||
const BufferInterval& buffer_interval,
|
||||
const ChunkCandidate& chunk_candidate) {
|
||||
pending_chunks_.emplace_back(buffer_interval, chunk_candidate);
|
||||
}
|
||||
|
||||
bool AlternateMemoryBestFitHeap::FindAllocation(
|
||||
int64 start_time, int64 end_time, HloPosition defining_position, HloUse use,
|
||||
const HloValue* buffer, int64 size,
|
||||
MemorySpaceAssignment::AllocationSequence* allocations) {
|
||||
@ -181,7 +218,7 @@ void AlternateMemoryBestFitHeap::FindAllocation(
|
||||
if (TryAllocatingInAlternateMemoryNoCopy(
|
||||
start_time, end_time, defining_position, use, alternate_mem_interval,
|
||||
non_bitcast_operand, allocations)) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
MemorySpaceAssignment::Allocation* prev_allocation = nullptr;
|
||||
@ -199,26 +236,46 @@ void AlternateMemoryBestFitHeap::FindAllocation(
|
||||
// TODO(berkin): For now evictions happen relative to the most recent
|
||||
// allocation in the alternate memory. We can potentially start evictions
|
||||
// earlier and end later.
|
||||
HloInstruction* earliest_instruction =
|
||||
GetInstructionAt(prev_allocation->start_time());
|
||||
HloInstruction* latest_instruction =
|
||||
GetInstructionAt(prev_allocation->end_time());
|
||||
|
||||
VLOG(3) << "Evicting buffer at " << prev_allocation->chunk().offset << " ("
|
||||
<< prev_allocation->start_time() << ", "
|
||||
<< prev_allocation->end_time() << ")";
|
||||
VLOG(3) << "Copy to default mem between instructions "
|
||||
<< earliest_instruction->ToString() << " - "
|
||||
<< latest_instruction->ToString();
|
||||
|
||||
// The live range of this buffer is from the start time of the previous
|
||||
// buffer that was in the alternate memory so that a buffer is allocated
|
||||
// during the copy.
|
||||
allocations->push_back(
|
||||
absl::make_unique<MemorySpaceAssignment::CopyAllocation>(
|
||||
*prev_allocation, MemorySpace::kDefault,
|
||||
kDefaultMemorySpaceDummyChunk, prev_allocation->start_time(),
|
||||
end_time, earliest_instruction, latest_instruction));
|
||||
// See if this interval would violate the asynchronous copy limit.
|
||||
if (!ViolatesMaximumOutstandingAsyncCopies(prev_allocation->start_time(),
|
||||
prev_allocation->end_time())) {
|
||||
AddAsyncCopy(*prev_allocation, MemorySpace::kDefault, kDummyChunk,
|
||||
prev_allocation->start_time(), prev_allocation->end_time(),
|
||||
allocations);
|
||||
|
||||
} else {
|
||||
VLOG(3) << "This violates the maximum async copies.";
|
||||
// If the original interval violated the limit, try sub-intervals within
|
||||
// this interval.
|
||||
bool eviction_scheduled = false;
|
||||
for (int64 time = prev_allocation->start_time();
|
||||
time <= prev_allocation->end_time(); ++time) {
|
||||
VLOG(3) << "Try evicting (" << time << ", " << time << ")";
|
||||
if (!ViolatesMaximumOutstandingAsyncCopies(time, time)) {
|
||||
VLOG(3) << "Eviction successful.";
|
||||
AddAsyncCopy(*prev_allocation, MemorySpace::kDefault, kDummyChunk,
|
||||
time, time, allocations);
|
||||
eviction_scheduled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!eviction_scheduled) {
|
||||
// If the eviction couldn't be scheduled, then fail. This buffer will be
|
||||
// kept in the default memory.
|
||||
VLOG(3) << "Bailing: Could not evict " << use.ToString()
|
||||
<< " because we hit the limit of maximum asynchronous copies "
|
||||
<< "between "
|
||||
<< GetInstructionAt(prev_allocation->start_time())->ToString()
|
||||
<< " and "
|
||||
<< GetInstructionAt(prev_allocation->end_time())->ToString();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (prev_allocation != nullptr &&
|
||||
prev_allocation->memory_space() == MemorySpace::kDefault &&
|
||||
prev_allocation->instruction() == non_bitcast_operand) {
|
||||
@ -229,7 +286,7 @@ void AlternateMemoryBestFitHeap::FindAllocation(
|
||||
} else {
|
||||
allocations->push_back(absl::make_unique<MemorySpaceAssignment::Allocation>(
|
||||
non_bitcast_operand, defining_position, MemorySpace::kDefault,
|
||||
kDefaultMemorySpaceDummyChunk, start_time, end_time));
|
||||
kDummyChunk, start_time, end_time));
|
||||
}
|
||||
|
||||
// Try partially placing the buffer in the alternate space. The time that is
|
||||
@ -252,35 +309,81 @@ void AlternateMemoryBestFitHeap::FindAllocation(
|
||||
VLOG(4) << "Trying alternate memory allocation ("
|
||||
<< alternate_mem_interval.start << ", "
|
||||
<< alternate_mem_interval.end << ")";
|
||||
// If this additional asynchronous copy would violate the limit, try a
|
||||
// different interval.
|
||||
if (ViolatesMaximumOutstandingAsyncCopies(alternate_mem_interval.start,
|
||||
alternate_mem_interval.end)) {
|
||||
VLOG(4) << "This would violate the outstanding async copy limit.";
|
||||
continue;
|
||||
}
|
||||
ChunkCandidate chunk_candidate = FindChunkCandidate(alternate_mem_interval);
|
||||
// Check if the new heap size fits within limits.
|
||||
if (chunk_candidate.heap_size < max_size_in_bytes_) {
|
||||
HloInstruction* earliest_instruction =
|
||||
GetInstructionAt(alternate_mem_interval.start);
|
||||
VLOG(3) << "Move the buffer to alternate memory at "
|
||||
<< alternate_mem_interval.start
|
||||
<< ". Offset = " << chunk_candidate.chunk.offset
|
||||
<< ", size = " << chunk_candidate.chunk.size
|
||||
<< ", heap_size = " << chunk_candidate.heap_size;
|
||||
VLOG(3) << "Copy to alternate mem between instructions "
|
||||
<< earliest_instruction->ToString() << " - "
|
||||
<< use.instruction->ToString();
|
||||
CommitChunk(alternate_mem_interval, chunk_candidate);
|
||||
AddToPendingChunks(alternate_mem_interval, chunk_candidate);
|
||||
|
||||
AddAsyncCopy(*allocations->back().get(), MemorySpace::kAlternate,
|
||||
chunk_candidate.chunk, alternate_mem_interval.start,
|
||||
end_time, allocations);
|
||||
|
||||
// Since copies couldn't be removed, create an allocation in the
|
||||
// default memory space.
|
||||
allocations->push_back(
|
||||
absl::make_unique<MemorySpaceAssignment::CopyAllocation>(
|
||||
*allocations->back().get(), MemorySpace::kAlternate,
|
||||
chunk_candidate.chunk, alternate_mem_interval.start, end_time,
|
||||
earliest_instruction, use.instruction));
|
||||
allocations->back()->AddUse(use);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If a copy wasn't inserted, then add this use to the latest allocation.
|
||||
allocations->back()->AddUse(use);
|
||||
return true;
|
||||
}
|
||||
|
||||
void AlternateMemoryBestFitHeap::AddAsyncCopy(
|
||||
const MemorySpaceAssignment::Allocation& prev_allocation,
|
||||
MemorySpace memory_space, Chunk chunk, int64 start_time, int64 end_time,
|
||||
MemorySpaceAssignment::AllocationSequence* allocations) {
|
||||
HloInstruction* earliest_instruction = GetInstructionAt(start_time);
|
||||
HloInstruction* latest_instruction = GetInstructionAt(end_time);
|
||||
|
||||
VLOG(3) << "Copy to "
|
||||
<< (memory_space == MemorySpaceAssignment::MemorySpace::kDefault
|
||||
? "default"
|
||||
: "alternate")
|
||||
<< " memory between instructions " << earliest_instruction->ToString()
|
||||
<< " - " << latest_instruction->ToString();
|
||||
|
||||
allocations->push_back(
|
||||
absl::make_unique<MemorySpaceAssignment::CopyAllocation>(
|
||||
prev_allocation, memory_space, chunk, start_time, end_time,
|
||||
earliest_instruction, latest_instruction));
|
||||
|
||||
// Register the additional async copy with the interval tree to keep track of
|
||||
// the limit at any given time.
|
||||
pending_async_copies_.emplace_back(start_time, end_time);
|
||||
}
|
||||
|
||||
bool AlternateMemoryBestFitHeap::ViolatesMaximumOutstandingAsyncCopies(
|
||||
int64 start_time, int64 end_time) const {
|
||||
if (max_outstanding_async_copies_ < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Count both the asynchronous copies in the interval tree as well as the
|
||||
// pending asynchronous copies belonging to this buffer.
|
||||
int64 num_async_copies =
|
||||
async_copy_interval_tree_.ChunksOverlappingInTime(start_time, end_time)
|
||||
.size();
|
||||
|
||||
for (auto interval : pending_async_copies_) {
|
||||
if (interval.second > start_time && interval.first < end_time) {
|
||||
num_async_copies++;
|
||||
}
|
||||
}
|
||||
// Add one because we are checking if adding an additional asynchronous copy
|
||||
// would violate the limit.
|
||||
return num_async_copies + 1 > max_outstanding_async_copies_;
|
||||
}
|
||||
|
||||
bool AlternateMemoryBestFitHeap::TryAllocatingInAlternateMemoryNoCopy(
|
||||
@ -332,7 +435,7 @@ bool AlternateMemoryBestFitHeap::TryAllocatingInAlternateMemoryNoCopy(
|
||||
<< chunk_candidate.chunk.offset
|
||||
<< ", size = " << chunk_candidate.chunk.size
|
||||
<< ", heap_size = " << chunk_candidate.heap_size;
|
||||
CommitChunk(alternate_mem_interval, chunk_candidate);
|
||||
AddToPendingChunks(alternate_mem_interval, chunk_candidate);
|
||||
|
||||
// If there was a previous allocation, the buffer location is the
|
||||
// same as the previous. Otherwise, it is the operand.
|
||||
@ -351,6 +454,22 @@ bool AlternateMemoryBestFitHeap::TryAllocatingInAlternateMemoryNoCopy(
|
||||
return false;
|
||||
}
|
||||
|
||||
/*static*/ int64 MemorySpaceAssignment::CountMaximumOutstandingAsyncCopies(
|
||||
const HloModule& module) {
|
||||
int64 max_copies = 0;
|
||||
int64 current_copies = 0;
|
||||
for (HloInstruction* instruction :
|
||||
module.schedule().sequence(module.entry_computation()).instructions()) {
|
||||
if (instruction->opcode() == HloOpcode::kCopyStart) {
|
||||
current_copies++;
|
||||
} else if (instruction->opcode() == HloOpcode::kCopyDone) {
|
||||
current_copies--;
|
||||
}
|
||||
max_copies = std::max(max_copies, current_copies);
|
||||
}
|
||||
return max_copies;
|
||||
}
|
||||
|
||||
/*static*/ StatusOr<std::unique_ptr<PresetAssignments>>
|
||||
MemorySpaceAssignment::Run(
|
||||
HloModule* module, int64 alternate_memory_space, int64 max_size_in_bytes,
|
||||
@ -358,7 +477,8 @@ MemorySpaceAssignment::Run(
|
||||
int64 alternate_memory_space_alignment_in_bytes,
|
||||
BufferValue::SizeFunction size_fn,
|
||||
AlternateMemoryBestFitHeap::IsAllowedInAlternateMemoryFunction
|
||||
is_allowed_in_alternate_mem) {
|
||||
is_allowed_in_alternate_mem,
|
||||
int64 max_outstanding_async_copies) {
|
||||
CHECK(module->has_schedule());
|
||||
VLOG(4) << "Module before memory space assignment: ";
|
||||
XLA_VLOG_LINES(4, module->ToString());
|
||||
@ -372,7 +492,7 @@ MemorySpaceAssignment::Run(
|
||||
min_prefetch_interval, max_prefetch_interval, *alias_analysis,
|
||||
alternate_memory_space_alignment_in_bytes,
|
||||
GlobalDecreasingSizeBestFitHeap::Type::kSpatial,
|
||||
is_allowed_in_alternate_mem);
|
||||
is_allowed_in_alternate_mem, max_outstanding_async_copies);
|
||||
|
||||
TF_RETURN_IF_ERROR(HeapSimulator::Run(std::move(algorithm), *module,
|
||||
module->schedule(),
|
||||
@ -385,6 +505,8 @@ MemorySpaceAssignment::Run(
|
||||
VLOG(4) << "Module after memory space assignment: ";
|
||||
XLA_VLOG_LINES(4, module->ToString());
|
||||
TF_CHECK_OK(module->schedule().Verify());
|
||||
VLOG(1) << "Maximum number of outstanding async copies: "
|
||||
<< CountMaximumOutstandingAsyncCopies(*module);
|
||||
|
||||
return std::move(memory_space_assignment.preset_assignments_);
|
||||
}
|
||||
|
@ -200,6 +200,8 @@ class MemorySpaceAssignment {
|
||||
// in the alternate memory space, size_fn is the size function for buffer
|
||||
// values, and is_allowed_in_alternate_mem can be used to prevent certain
|
||||
// HloValues (e.g., based on the opcode) to be placed on the alternate memory.
|
||||
// max_outstanding_async_copies specifies the upper bound for number of
|
||||
// outstanding asynchronous copies, -1 for unlimited.
|
||||
// TODO(berkin): Use the cost model instead of using number of instructions to
|
||||
// decide how early to prefetch.
|
||||
static StatusOr<std::unique_ptr<PresetAssignments>> Run(
|
||||
@ -207,7 +209,12 @@ class MemorySpaceAssignment {
|
||||
int64 min_prefetch_interval, int64 max_prefetch_interval,
|
||||
int64 alternate_memory_space_alignment_in_bytes,
|
||||
BufferValue::SizeFunction size_fn,
|
||||
std::function<bool(const HloValue&)> is_allowed_in_alternate_mem);
|
||||
std::function<bool(const HloValue&)> is_allowed_in_alternate_mem,
|
||||
int64 max_outstanding_async_copies = -1);
|
||||
|
||||
// Returns the maximum number of outstanding asynchronous copies in the
|
||||
// module.
|
||||
static int64 CountMaximumOutstandingAsyncCopies(const HloModule& module);
|
||||
|
||||
private:
|
||||
MemorySpaceAssignment(HloModule* module, int64 alternate_memory_space)
|
||||
@ -265,14 +272,16 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
|
||||
int64 max_size_in_bytes, int64 min_prefetch_interval,
|
||||
int64 max_prefetch_interval, const HloAliasAnalysis& alias_analysis,
|
||||
int64 alignment, GlobalDecreasingSizeBestFitHeap::Type type,
|
||||
IsAllowedInAlternateMemoryFunction is_allowed_in_alternate_mem)
|
||||
IsAllowedInAlternateMemoryFunction is_allowed_in_alternate_mem,
|
||||
int64 max_outstanding_async_copies)
|
||||
: GlobalDecreasingSizeBestFitHeap(alignment, type),
|
||||
allocation_map_(allocation_map),
|
||||
max_size_in_bytes_(max_size_in_bytes),
|
||||
min_prefetch_interval_(min_prefetch_interval),
|
||||
max_prefetch_interval_(max_prefetch_interval),
|
||||
alias_analysis_(alias_analysis),
|
||||
is_allowed_in_alternate_mem_(is_allowed_in_alternate_mem) {}
|
||||
is_allowed_in_alternate_mem_(is_allowed_in_alternate_mem),
|
||||
max_outstanding_async_copies_(max_outstanding_async_copies) {}
|
||||
|
||||
HeapSimulator::Result Finish() override;
|
||||
|
||||
@ -281,8 +290,8 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
|
||||
// find a suitable chunk candidate within the heap size and prefetch interval
|
||||
// limits, and append the new allocation(s) to allocations. The new
|
||||
// allocations can be in default or alternate memory spaces, or can be
|
||||
// prefetches or evictions.
|
||||
void FindAllocation(int64 start_time, int64 end_time,
|
||||
// prefetches or evictions. Returns true if successful.
|
||||
bool FindAllocation(int64 start_time, int64 end_time,
|
||||
HloPosition defining_position, HloUse use,
|
||||
const HloValue* buffer, int64 size,
|
||||
MemorySpaceAssignment::AllocationSequence* allocations);
|
||||
@ -310,6 +319,23 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
|
||||
// unnecessarily adding the chunk to the chunk map.
|
||||
void AddToChunkMap(const HloValue* buffer, Chunk chunk) override {}
|
||||
|
||||
// Returns true if the addition of an asynchronous copy in the given time
|
||||
// interval would violate the maximum number of asynchronous copies.
|
||||
bool ViolatesMaximumOutstandingAsyncCopies(int64 start_time,
|
||||
int64 end_time) const;
|
||||
|
||||
// Adds an asynchronous copy to the allocations.
|
||||
void AddAsyncCopy(const MemorySpaceAssignment::Allocation& prev_allocation,
|
||||
MemorySpace memory_space, Chunk chunk, int64 start_time,
|
||||
int64 end_time,
|
||||
MemorySpaceAssignment::AllocationSequence* allocations);
|
||||
|
||||
// These methods are used for delaying committing the chunk candidate until
|
||||
// the entire live range of the buffer has been considered.
|
||||
void AddToPendingChunks(const BufferInterval& buffer_interval,
|
||||
const ChunkCandidate& chunk_candidate);
|
||||
void CommitPendingChunks();
|
||||
|
||||
MemorySpaceAssignment::AllocationMap* allocation_map_;
|
||||
int64 max_size_in_bytes_;
|
||||
// The min and max prefetch intervals decribe the number of independent HLOs
|
||||
@ -328,6 +354,12 @@ class AlternateMemoryBestFitHeap : public GlobalDecreasingSizeBestFitHeap {
|
||||
int64 max_prefetch_interval_;
|
||||
const HloAliasAnalysis& alias_analysis_;
|
||||
IsAllowedInAlternateMemoryFunction is_allowed_in_alternate_mem_;
|
||||
// We use a interval tree to keep track of the number of outstanding
|
||||
// asynchronous copies.
|
||||
BufferIntervalTree async_copy_interval_tree_;
|
||||
int64 max_outstanding_async_copies_;
|
||||
std::vector<std::pair<BufferInterval, ChunkCandidate>> pending_chunks_;
|
||||
std::vector<std::pair<int64, int64>> pending_async_copies_;
|
||||
};
|
||||
|
||||
} // namespace xla
|
||||
|
@ -31,7 +31,8 @@ class MemorySpaceAssignmentTest : public HloTestBase {
|
||||
const int64 kDefaultMemorySpace = 0;
|
||||
const int64 kAlternateMemorySpace = 1;
|
||||
|
||||
std::unique_ptr<PresetAssignments> AssignMemorySpace(HloModule* module) {
|
||||
std::unique_ptr<PresetAssignments> AssignMemorySpace(
|
||||
HloModule* module, int64 max_outstanding_async_copies = -1) {
|
||||
auto size_fn = [](const BufferValue& buffer) {
|
||||
return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8);
|
||||
};
|
||||
@ -56,7 +57,7 @@ class MemorySpaceAssignmentTest : public HloTestBase {
|
||||
/*min_prefetch_interval=*/2,
|
||||
/*max_prefetch_interval=*/10,
|
||||
/*alternate_memory_space_alignment_in_bytes=*/8, size_fn,
|
||||
is_allowed_in_alternate_mem)
|
||||
is_allowed_in_alternate_mem, max_outstanding_async_copies)
|
||||
.ValueOrDie();
|
||||
CheckPresetAssignments(preset_assignments.get());
|
||||
return preset_assignments;
|
||||
@ -80,6 +81,65 @@ class MemorySpaceAssignmentTest : public HloTestBase {
|
||||
<< position.ToString();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<HloModule> CreateEvictAndPrefetchModule() {
|
||||
HloComputation::Builder builder(TestName());
|
||||
Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
|
||||
HloInstruction* p0 =
|
||||
builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "p0"));
|
||||
HloInstruction* p1 =
|
||||
builder.AddInstruction(HloInstruction::CreateParameter(1, shape, "p1"));
|
||||
HloInstruction* tanh = builder.AddInstruction(
|
||||
HloInstruction::CreateUnary(shape, HloOpcode::kTanh, p0));
|
||||
// tanh should be placed in the alternate memory since there isn't much
|
||||
// contention in the beginning. However, tanh has another consumer at the
|
||||
// end. So it should be kicked out to default memory and prefetched back in.
|
||||
// The graph below is meant to increase the contention to force
|
||||
// eviction/prefetch behavior.
|
||||
HloInstruction* a = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, p0, tanh));
|
||||
HloInstruction* b = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
|
||||
HloInstruction* c = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, p0, p1));
|
||||
HloInstruction* d = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
|
||||
HloInstruction* e = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, b));
|
||||
HloInstruction* f = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, c));
|
||||
HloInstruction* g = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, d));
|
||||
HloInstruction* h = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, c));
|
||||
HloInstruction* i = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, d));
|
||||
HloInstruction* j = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, c, d));
|
||||
HloInstruction* k = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, e, f));
|
||||
HloInstruction* l = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, g, h));
|
||||
HloInstruction* m = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, i, j));
|
||||
HloInstruction* n = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, k, l));
|
||||
HloInstruction* o = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, n, m));
|
||||
// tanh is being used at the root instruction, and this should be
|
||||
// prefetched.
|
||||
HloInstruction* add = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, o, tanh));
|
||||
|
||||
auto module = CreateNewVerifiedModule();
|
||||
HloComputation* computation = module->AddEntryComputation(builder.Build());
|
||||
|
||||
HloSchedule schedule(module.get());
|
||||
schedule.set_sequence(computation, {p0, p1, tanh, a, b, c, d, e, f, g, h, i,
|
||||
j, k, l, m, n, o, add});
|
||||
TF_CHECK_OK(module->set_schedule(schedule));
|
||||
return module;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, ParameterOnly) {
|
||||
@ -141,8 +201,11 @@ TEST_F(MemorySpaceAssignmentTest, Simple) {
|
||||
EXPECT_THAT(sub, op::ShapeWithLayout(shape_in_alternate_mem));
|
||||
|
||||
// Make sure the preset assignments is sane.
|
||||
EXPECT_THAT(preset_assignments->chunks().size(), 2);
|
||||
EXPECT_THAT(preset_assignments->sizes().size(), 1);
|
||||
EXPECT_EQ(preset_assignments->chunks().size(), 2);
|
||||
EXPECT_EQ(preset_assignments->sizes().size(), 1);
|
||||
// Ensure the offset assigned to add and sub are different.
|
||||
EXPECT_NE(preset_assignments->chunks()[0].second.offset,
|
||||
preset_assignments->chunks()[1].second.offset);
|
||||
}
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, NegateChain) {
|
||||
@ -209,69 +272,37 @@ TEST_F(MemorySpaceAssignmentTest, NegateChain) {
|
||||
}
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, EvictAndPrefetch) {
|
||||
HloComputation::Builder builder(TestName());
|
||||
Shape shape = ShapeUtil::MakeShape(F32, {2, 3});
|
||||
HloInstruction* p0 =
|
||||
builder.AddInstruction(HloInstruction::CreateParameter(0, shape, "p0"));
|
||||
HloInstruction* p1 =
|
||||
builder.AddInstruction(HloInstruction::CreateParameter(1, shape, "p1"));
|
||||
HloInstruction* tanh = builder.AddInstruction(
|
||||
HloInstruction::CreateUnary(shape, HloOpcode::kTanh, p0));
|
||||
// tanh should be placed in the alternate memory since there isn't much
|
||||
// contention in the beginning. However, tanh has another consumer at the end.
|
||||
// So it should be kicked out to default memory and prefetched back in.
|
||||
// The graph below is meant to increase the contention to force
|
||||
// eviction/prefetch behavior.
|
||||
HloInstruction* a = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, p0, tanh));
|
||||
HloInstruction* b = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
|
||||
HloInstruction* c = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, p0, p1));
|
||||
HloInstruction* d = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kSubtract, p0, p1));
|
||||
HloInstruction* e = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, b));
|
||||
HloInstruction* f = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, c));
|
||||
HloInstruction* g = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, a, d));
|
||||
HloInstruction* h = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, c));
|
||||
HloInstruction* i = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, b, d));
|
||||
HloInstruction* j = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kMultiply, c, d));
|
||||
HloInstruction* k = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, e, f));
|
||||
HloInstruction* l = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, g, h));
|
||||
HloInstruction* m = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, i, j));
|
||||
HloInstruction* n = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, k, l));
|
||||
HloInstruction* o = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, n, m));
|
||||
// tanh is being used at the root instruction, and this should be prefetched.
|
||||
HloInstruction* add = builder.AddInstruction(
|
||||
HloInstruction::CreateBinary(shape, HloOpcode::kAdd, o, tanh));
|
||||
|
||||
auto module = CreateNewVerifiedModule();
|
||||
HloComputation* computation = module->AddEntryComputation(builder.Build());
|
||||
|
||||
HloSchedule schedule(module.get());
|
||||
schedule.set_sequence(computation, {p0, p1, tanh, a, b, c, d, e, f, g, h, i,
|
||||
j, k, l, m, n, o, add});
|
||||
TF_CHECK_OK(module->set_schedule(schedule));
|
||||
std::unique_ptr<HloModule> module = CreateEvictAndPrefetchModule();
|
||||
|
||||
AssignMemorySpace(module.get());
|
||||
|
||||
EXPECT_THAT(
|
||||
add,
|
||||
module->entry_computation()->root_instruction(),
|
||||
op::Add(op::Add(),
|
||||
op::AsyncCopy(kAlternateMemorySpace, kDefaultMemorySpace,
|
||||
op::AsyncCopy(kDefaultMemorySpace,
|
||||
kAlternateMemorySpace, op::Tanh()))));
|
||||
|
||||
EXPECT_EQ(MemorySpaceAssignment::CountMaximumOutstandingAsyncCopies(*module),
|
||||
2);
|
||||
}
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, EvictAndPrefetchLimitAsyncCopies0) {
|
||||
std::unique_ptr<HloModule> module = CreateEvictAndPrefetchModule();
|
||||
|
||||
AssignMemorySpace(module.get(), /*max_outstanding_async_copies=*/0);
|
||||
|
||||
EXPECT_EQ(MemorySpaceAssignment::CountMaximumOutstandingAsyncCopies(*module),
|
||||
0);
|
||||
}
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, EvictAndPrefetchLimitAsyncCopies1) {
|
||||
std::unique_ptr<HloModule> module = CreateEvictAndPrefetchModule();
|
||||
|
||||
AssignMemorySpace(module.get(), /*max_outstanding_async_copies=*/1);
|
||||
|
||||
EXPECT_EQ(MemorySpaceAssignment::CountMaximumOutstandingAsyncCopies(*module),
|
||||
1);
|
||||
}
|
||||
|
||||
TEST_F(MemorySpaceAssignmentTest, While) {
|
||||
|
@ -11,15 +11,15 @@ tensorflow/python/tpu/profiler/pip_package/build_pip_package.sh
|
||||
tensorflow/python/tpu/profiler/pip_package/setup.py
|
||||
tensorflow/stream_executor/build_defs.bzl
|
||||
tensorflow/third_party/BUILD
|
||||
tensorflow/third_party/__init__.py
|
||||
tensorflow/third_party/android/BUILD
|
||||
tensorflow/third_party/android/android.bzl.tpl
|
||||
tensorflow/third_party/android/android_configure.BUILD.tpl
|
||||
tensorflow/third_party/android/android_configure.bzl
|
||||
tensorflow/third_party/__init__.py
|
||||
tensorflow/third_party/arm_neon_2_x86_sse.BUILD
|
||||
tensorflow/third_party/astor.BUILD
|
||||
tensorflow/third_party/backports_weakref.BUILD
|
||||
tensorflow/third_party/boringssl/BUILD
|
||||
tensorflow/third_party/backports_weakref.BUILD
|
||||
tensorflow/third_party/clang_toolchain/BUILD
|
||||
tensorflow/third_party/clang_toolchain/cc_configure_clang.bzl
|
||||
tensorflow/third_party/clang_toolchain/download_clang.bzl
|
||||
@ -27,86 +27,86 @@ tensorflow/third_party/codegen.BUILD
|
||||
tensorflow/third_party/com_google_absl.BUILD
|
||||
tensorflow/third_party/common.bzl
|
||||
tensorflow/third_party/cub.BUILD
|
||||
tensorflow/third_party/cython.BUILD
|
||||
tensorflow/third_party/curl.BUILD
|
||||
tensorflow/third_party/eigen.BUILD
|
||||
tensorflow/third_party/cython.BUILD
|
||||
tensorflow/third_party/double_conversion.BUILD
|
||||
tensorflow/third_party/eigen3/BUILD
|
||||
tensorflow/third_party/eigen3/Eigen/Core
|
||||
tensorflow/third_party/eigen3/Eigen/Cholesky
|
||||
tensorflow/third_party/eigen3/Eigen/Core
|
||||
tensorflow/third_party/eigen3/Eigen/Eigenvalues
|
||||
tensorflow/third_party/eigen3/Eigen/LU
|
||||
tensorflow/third_party/eigen3/Eigen/QR
|
||||
tensorflow/third_party/eigen3/Eigen/SVD
|
||||
tensorflow/third_party/eigen3/BUILD
|
||||
tensorflow/third_party/eigen3/LICENSE
|
||||
tensorflow/third_party/eigen3/gpu_packet_math.patch
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
|
||||
tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
|
||||
tensorflow/third_party/enum34.BUILD
|
||||
tensorflow/third_party/farmhash.BUILD
|
||||
tensorflow/third_party/eigen.BUILD
|
||||
tensorflow/third_party/fft2d/BUILD
|
||||
tensorflow/third_party/fft2d/LICENSE
|
||||
tensorflow/third_party/fft2d/fft2d.BUILD
|
||||
tensorflow/third_party/fft2d/fft.h
|
||||
tensorflow/third_party/fft2d/fft2d.BUILD
|
||||
tensorflow/third_party/fft2d/fft2d.h
|
||||
tensorflow/third_party/enum34.BUILD
|
||||
tensorflow/third_party/farmhash.BUILD
|
||||
tensorflow/third_party/git/BUILD
|
||||
tensorflow/third_party/git/BUILD.tpl
|
||||
tensorflow/third_party/git/git_configure.bzl
|
||||
tensorflow/third_party/functools32.BUILD
|
||||
tensorflow/third_party/gast.BUILD
|
||||
tensorflow/third_party/gif.BUILD
|
||||
tensorflow/third_party/git/BUILD.tpl
|
||||
tensorflow/third_party/git/BUILD
|
||||
tensorflow/third_party/git/git_configure.bzl
|
||||
tensorflow/third_party/googleapis.BUILD
|
||||
tensorflow/third_party/gpus/BUILD
|
||||
tensorflow/third_party/gpus/crosstool/BUILD
|
||||
tensorflow/third_party/gpus/crosstool/BUILD.tpl
|
||||
tensorflow/third_party/gpus/crosstool/LICENSE
|
||||
tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
|
||||
tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
|
||||
tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
|
||||
tensorflow/third_party/gpus/BUILD
|
||||
tensorflow/third_party/gpus/cuda/BUILD
|
||||
tensorflow/third_party/gpus/cuda/BUILD.tpl
|
||||
tensorflow/third_party/gpus/cuda/BUILD.windows.tpl
|
||||
tensorflow/third_party/gpus/cuda/build_defs.bzl.tpl
|
||||
tensorflow/third_party/gpus/cuda/BUILD.tpl
|
||||
tensorflow/third_party/gpus/cuda/LICENSE
|
||||
tensorflow/third_party/gpus/cuda/build_defs.bzl.tpl
|
||||
tensorflow/third_party/gpus/cuda/cuda_config.h.tpl
|
||||
tensorflow/third_party/gpus/cuda_configure.bzl
|
||||
tensorflow/third_party/gpus/rocm/BUILD
|
||||
tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl
|
||||
tensorflow/third_party/gpus/rocm/BUILD.tpl
|
||||
tensorflow/third_party/gpus/rocm/rocm_config.h.tpl
|
||||
tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl
|
||||
tensorflow/third_party/gpus/cuda_configure.bzl
|
||||
tensorflow/third_party/gpus/find_cuda_config.py
|
||||
tensorflow/third_party/gpus/rocm_configure.bzl
|
||||
tensorflow/third_party/googleapis.BUILD
|
||||
tensorflow/third_party/grpc/BUILD
|
||||
tensorflow/third_party/icu/udata.patch
|
||||
tensorflow/third_party/jsoncpp.BUILD
|
||||
tensorflow/third_party/kafka/config.patch
|
||||
tensorflow/third_party/kafka/BUILD
|
||||
tensorflow/third_party/libxsmm.BUILD
|
||||
tensorflow/third_party/linenoise.BUILD
|
||||
tensorflow/third_party/kafka/config.patch
|
||||
tensorflow/third_party/jsoncpp.BUILD
|
||||
tensorflow/third_party/llvm/BUILD
|
||||
tensorflow/third_party/llvm/expand_cmake_vars.py
|
||||
tensorflow/third_party/llvm/llvm.autogenerated.BUILD
|
||||
tensorflow/third_party/llvm/llvm.bzl
|
||||
tensorflow/third_party/mkl/LICENSE
|
||||
tensorflow/third_party/libxsmm.BUILD
|
||||
tensorflow/third_party/linenoise.BUILD
|
||||
tensorflow/third_party/lmdb.BUILD
|
||||
tensorflow/third_party/mkl/BUILD
|
||||
tensorflow/third_party/mkl/LICENSE
|
||||
tensorflow/third_party/mkl/MKL_LICENSE
|
||||
tensorflow/third_party/mkl/build_defs.bzl
|
||||
tensorflow/third_party/mkl/mkl.BUILD
|
||||
tensorflow/third_party/lmdb.BUILD
|
||||
tensorflow/third_party/mkl_dnn/mkldnn.BUILD
|
||||
tensorflow/third_party/mkl_dnn/LICENSE
|
||||
tensorflow/third_party/mkl_dnn/mkldnn.BUILD
|
||||
tensorflow/third_party/mpi/.gitignore
|
||||
tensorflow/third_party/mpi/BUILD
|
||||
tensorflow/third_party/mpi_collectives/BUILD
|
||||
@ -121,89 +121,87 @@ tensorflow/third_party/nccl/system.BUILD.tpl
|
||||
tensorflow/third_party/ngraph/BUILD
|
||||
tensorflow/third_party/ngraph/LICENSE
|
||||
tensorflow/third_party/ngraph/NGRAPH_LICENSE
|
||||
tensorflow/third_party/ngraph/ngraph_tf.BUILD
|
||||
tensorflow/third_party/ngraph/build_defs.bzl
|
||||
tensorflow/third_party/ngraph/ngraph.BUILD
|
||||
tensorflow/third_party/ngraph/nlohmann_json.BUILD
|
||||
tensorflow/third_party/ngraph/ngraph_tf.BUILD
|
||||
tensorflow/third_party/ngraph/tbb.BUILD
|
||||
tensorflow/third_party/opt_einsum.BUILD
|
||||
tensorflow/third_party/pcre.BUILD
|
||||
tensorflow/third_party/png.BUILD
|
||||
tensorflow/third_party/png_fix_rpi.patch
|
||||
tensorflow/third_party/pprof.BUILD
|
||||
tensorflow/third_party/protobuf/BUILD
|
||||
tensorflow/third_party/py/BUILD.tpl
|
||||
tensorflow/third_party/py/BUILD
|
||||
tensorflow/third_party/pprof.BUILD
|
||||
tensorflow/third_party/py/numpy/BUILD
|
||||
tensorflow/third_party/py/BUILD
|
||||
tensorflow/third_party/py/BUILD.tpl
|
||||
tensorflow/third_party/py/python_configure.bzl
|
||||
tensorflow/third_party/pybind11.BUILD
|
||||
tensorflow/third_party/python_runtime/BUILD
|
||||
tensorflow/third_party/pybind11.BUILD
|
||||
tensorflow/third_party/repo.bzl
|
||||
tensorflow/third_party/six.BUILD
|
||||
tensorflow/third_party/snappy.BUILD
|
||||
tensorflow/third_party/sqlite.BUILD
|
||||
tensorflow/third_party/swig.BUILD
|
||||
tensorflow/third_party/sycl/crosstool/BUILD
|
||||
tensorflow/third_party/systemlibs/BUILD
|
||||
tensorflow/third_party/systemlibs/BUILD.tpl
|
||||
tensorflow/third_party/systemlibs/BUILD
|
||||
tensorflow/third_party/systemlibs/absl_py.BUILD
|
||||
tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
|
||||
tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD
|
||||
tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
|
||||
tensorflow/third_party/systemlibs/astor.BUILD
|
||||
tensorflow/third_party/systemlibs/build_defs.bzl.tpl
|
||||
tensorflow/third_party/systemlibs/boringssl.BUILD
|
||||
tensorflow/third_party/systemlibs/cython.BUILD
|
||||
tensorflow/third_party/systemlibs/build_defs.bzl.tpl
|
||||
tensorflow/third_party/systemlibs/curl.BUILD
|
||||
tensorflow/third_party/systemlibs/cython.BUILD
|
||||
tensorflow/third_party/systemlibs/double_conversion.BUILD
|
||||
tensorflow/third_party/systemlibs/gast.BUILD
|
||||
tensorflow/third_party/systemlibs/gif.BUILD
|
||||
tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
|
||||
tensorflow/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
|
||||
tensorflow/third_party/systemlibs/grpc.BUILD
|
||||
tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
|
||||
tensorflow/third_party/systemlibs/googleapis.BUILD
|
||||
tensorflow/third_party/systemlibs/lmdb.BUILD
|
||||
tensorflow/third_party/systemlibs/nsync.BUILD
|
||||
tensorflow/third_party/systemlibs/jsoncpp.BUILD
|
||||
tensorflow/third_party/systemlibs/grpc.BUILD
|
||||
tensorflow/third_party/systemlibs/lmdb.BUILD
|
||||
tensorflow/third_party/systemlibs/opt_einsum.BUILD
|
||||
tensorflow/third_party/systemlibs/nsync.BUILD
|
||||
tensorflow/third_party/systemlibs/pcre.BUILD
|
||||
tensorflow/third_party/systemlibs/png.BUILD
|
||||
tensorflow/third_party/systemlibs/protobuf.BUILD
|
||||
tensorflow/third_party/systemlibs/protobuf.bzl
|
||||
tensorflow/third_party/systemlibs/re2.BUILD
|
||||
tensorflow/third_party/systemlibs/six.BUILD
|
||||
tensorflow/third_party/systemlibs/snappy.BUILD
|
||||
tensorflow/third_party/systemlibs/protobuf.bzl
|
||||
tensorflow/third_party/systemlibs/png.BUILD
|
||||
tensorflow/third_party/systemlibs/re2.BUILD
|
||||
tensorflow/third_party/systemlibs/sqlite.BUILD
|
||||
tensorflow/third_party/systemlibs/swig.BUILD
|
||||
tensorflow/third_party/systemlibs/snappy.BUILD
|
||||
tensorflow/third_party/systemlibs/syslibs_configure.bzl
|
||||
tensorflow/third_party/systemlibs/termcolor.BUILD
|
||||
tensorflow/third_party/systemlibs/zlib.BUILD
|
||||
tensorflow/third_party/tensorrt/BUILD
|
||||
tensorflow/third_party/tensorrt/LICENSE
|
||||
tensorflow/third_party/tensorrt/BUILD.tpl
|
||||
tensorflow/third_party/tensorrt/build_defs.bzl.tpl
|
||||
tensorflow/third_party/tensorrt/LICENSE
|
||||
tensorflow/third_party/tensorrt/tensorrt/include/tensorrt_config.h.tpl
|
||||
tensorflow/third_party/tensorrt/tensorrt_configure.bzl
|
||||
tensorflow/third_party/termcolor.BUILD
|
||||
tensorflow/third_party/tflite_mobilenet.BUILD
|
||||
tensorflow/third_party/tflite_mobilenet_float.BUILD
|
||||
tensorflow/third_party/tflite_mobilenet_quant.BUILD
|
||||
tensorflow/third_party/tflite_ovic_testdata.BUILD
|
||||
tensorflow/third_party/tflite_smartreply.BUILD
|
||||
tensorflow/third_party/toolchains/BUILD
|
||||
tensorflow/third_party/toolchains/clang6/BUILD
|
||||
tensorflow/third_party/toolchains/clang6/CROSSTOOL.tpl
|
||||
tensorflow/third_party/toolchains/clang6/README.md
|
||||
tensorflow/third_party/toolchains/clang6/clang.BUILD
|
||||
tensorflow/third_party/toolchains/clang6/repo.bzl
|
||||
tensorflow/third_party/toolchains/cpus/arm/cc_config.bzl.tpl
|
||||
tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
|
||||
tensorflow/third_party/toolchains/BUILD
|
||||
tensorflow/third_party/toolchains/cpus/arm/BUILD
|
||||
tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
|
||||
tensorflow/third_party/toolchains/cpus/arm/cc_config.bzl.tpl
|
||||
tensorflow/third_party/toolchains/cpus/py/BUILD
|
||||
tensorflow/third_party/toolchains/cpus/py3/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/cuda10.0-cudnn7/cuda/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/cuda10.1-cudnn7/cuda/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/cuda10.1-cudnn7/cuda/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/cuda10.1-cudnn7/cuda/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/gcc7/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/gcc7/cc_toolchain_config.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/gcc7/dummy_toolchain.bzl
|
||||
@ -216,8 +214,8 @@ tensorflow/third_party/toolchains/preconfig/centos6/py3/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/tensorrt5/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/centos6/tensorrt5/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/generate/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/BUILD
|
||||
@ -227,9 +225,9 @@ tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda10.0/cc_too
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu14.04/py3/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/clang/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/clang/cc_toolchain_config.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/clang/dummy_toolchain.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/clang/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/cuda10.0-cudnn7/cuda/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/cuda10.0-cudnn7/cuda/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc5-rocm/BUILD
|
||||
@ -242,18 +240,20 @@ tensorflow/third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/py/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/py3/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/py3_opt/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/rocm/rocm/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/rocm/rocm/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/rocm/rocm/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/tensorrt5/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/tensorrt5.1/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/ubuntu16.04/tensorrt5.1/build_defs.bzl
|
||||
tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/win_1803/bazel_025/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
|
||||
tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
|
||||
tensorflow/third_party/toolchains/remote/BUILD
|
||||
tensorflow/third_party/toolchains/remote/BUILD.tpl
|
||||
tensorflow/third_party/toolchains/remote/configure.bzl
|
||||
tensorflow/third_party/toolchains/remote/BUILD.tpl
|
||||
tensorflow/third_party/toolchains/remote/execution.bzl.tpl
|
||||
tensorflow/third_party/tflite_ovic_testdata.BUILD
|
||||
tensorflow/third_party/tflite_smartreply.BUILD
|
||||
tensorflow/third_party/wrapt.BUILD
|
||||
tensorflow/third_party/zlib.BUILD
|
||||
tensorflow/tools/ci_build/remote/BUILD
|
||||
@ -270,9 +270,9 @@ tensorflow/tools/lib_package/libtensorflow_test.c
|
||||
tensorflow/tools/lib_package/libtensorflow_test.sh
|
||||
tensorflow/tools/pip_package/BUILD
|
||||
tensorflow/tools/pip_package/MANIFEST.in
|
||||
tensorflow/tools/pip_package/README
|
||||
tensorflow/tools/pip_package/check_load_py_test.py
|
||||
tensorflow/tools/pip_package/build_pip_package.sh
|
||||
tensorflow/tools/pip_package/check_load_py_test.py
|
||||
tensorflow/tools/pip_package/README
|
||||
tensorflow/tools/pip_package/pip_smoke_test.py
|
||||
tensorflow/tools/pip_package/setup.py
|
||||
tensorflow/tools/pip_package/simple_console.py
|
||||
|
Loading…
Reference in New Issue
Block a user