[XLA:CPU] Implement SizeOfGeneratedCodeInBytes() on CPU.
[XLA:GPU] Account for global constants in implementation of SizeOfGeneratedCodeInBytes() on GPU. [XLA] Make SizeOfGeneratedCodeInBytes() const. PiperOrigin-RevId: 314660261 Change-Id: I0b76adcbb2d75dfb432599b38065d510ef093b87
This commit is contained in:
parent
f6128c8a84
commit
dd619e62dc
@ -376,5 +376,9 @@ const InstructionValueSet& CpuExecutable::GetRootValueSet() const {
|
|||||||
module().entry_computation()->root_instruction());
|
module().entry_computation()->root_instruction());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64 CpuExecutable::SizeOfGeneratedCodeInBytes() const {
|
||||||
|
return jit_->SizeOfGeneratedCodeInBytes();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace xla
|
} // namespace xla
|
||||||
|
@ -81,6 +81,8 @@ class CpuExecutable : public Executable {
|
|||||||
|
|
||||||
const BufferAssignment& buffer_assignment() const { return *assignment_; }
|
const BufferAssignment& buffer_assignment() const { return *assignment_; }
|
||||||
|
|
||||||
|
int64 SizeOfGeneratedCodeInBytes() const override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Creates an array suitable for passing as the "buffer_table" argument to the
|
// Creates an array suitable for passing as the "buffer_table" argument to the
|
||||||
// JIT compiled function pointer.
|
// JIT compiled function pointer.
|
||||||
|
@ -163,6 +163,7 @@ void SimpleOrcJIT::NotifyObjectFinalized(
|
|||||||
uint64_t key = static_cast<uint64_t>(
|
uint64_t key = static_cast<uint64_t>(
|
||||||
reinterpret_cast<uintptr_t>(object.getData().data()));
|
reinterpret_cast<uintptr_t>(object.getData().data()));
|
||||||
gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
|
gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
|
||||||
|
size_of_generated_code_in_bytes_ += object.getData().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SimpleOrcJIT::NotifyObjectFreed(const llvm::object::ObjectFile& object) {
|
void SimpleOrcJIT::NotifyObjectFreed(const llvm::object::ObjectFile& object) {
|
||||||
|
@ -88,6 +88,10 @@ class SimpleOrcJIT {
|
|||||||
const llvm::TargetOptions& target_options,
|
const llvm::TargetOptions& target_options,
|
||||||
llvm::CodeGenOpt::Level opt_level);
|
llvm::CodeGenOpt::Level opt_level);
|
||||||
|
|
||||||
|
int64 SizeOfGeneratedCodeInBytes() const {
|
||||||
|
return size_of_generated_code_in_bytes_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
llvm::JITSymbol ResolveRuntimeSymbol(const std::string& name);
|
llvm::JITSymbol ResolveRuntimeSymbol(const std::string& name);
|
||||||
|
|
||||||
@ -103,6 +107,7 @@ class SimpleOrcJIT {
|
|||||||
std::shared_ptr<llvm::orc::SymbolResolver> symbol_resolver_;
|
std::shared_ptr<llvm::orc::SymbolResolver> symbol_resolver_;
|
||||||
ObjLayerT object_layer_;
|
ObjLayerT object_layer_;
|
||||||
CompileLayerT compile_layer_;
|
CompileLayerT compile_layer_;
|
||||||
|
int64 size_of_generated_code_in_bytes_ = 0;
|
||||||
|
|
||||||
// Non owning pointer to a JIT event listener that registers the JIT events
|
// Non owning pointer to a JIT event listener that registers the JIT events
|
||||||
// with an attached GDB.
|
// with an attached GDB.
|
||||||
|
@ -245,6 +245,6 @@ StatusOr<ExecutionOutput> Executable::ExecuteAsyncOnStreamWrapper(
|
|||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
int64 Executable::SizeOfGeneratedCodeInBytes() { return -1; }
|
int64 Executable::SizeOfGeneratedCodeInBytes() const { return -1; }
|
||||||
|
|
||||||
} // namespace xla
|
} // namespace xla
|
||||||
|
@ -318,7 +318,7 @@ class Executable {
|
|||||||
// not supported by the executable.
|
// not supported by the executable.
|
||||||
//
|
//
|
||||||
// Does not include the size of used libraries (e.g. cuDNN, Eigen, etc.).
|
// Does not include the size of used libraries (e.g. cuDNN, Eigen, etc.).
|
||||||
virtual int64 SizeOfGeneratedCodeInBytes();
|
virtual int64 SizeOfGeneratedCodeInBytes() const;
|
||||||
|
|
||||||
// Dumping helpers.
|
// Dumping helpers.
|
||||||
void set_hlo_proto(std::unique_ptr<xla::HloProto> hlo_proto) {
|
void set_hlo_proto(std::unique_ptr<xla::HloProto> hlo_proto) {
|
||||||
|
@ -510,13 +510,21 @@ const InstructionValueSet& GpuExecutable::GetRootValueSet() const {
|
|||||||
module().entry_computation()->root_instruction());
|
module().entry_computation()->root_instruction());
|
||||||
}
|
}
|
||||||
|
|
||||||
int64 GpuExecutable::SizeOfGeneratedCodeInBytes() {
|
int64 GpuExecutable::SizeOfGeneratedCodeInBytes() const {
|
||||||
// Non-empty PTX but empty cubin: compilation must have failed, return
|
// Non-empty PTX but empty cubin: compilation must have failed, return
|
||||||
// "unknown".
|
// "unknown".
|
||||||
if (binary().empty() && !text_.empty()) {
|
if (binary().empty() && !text_.empty()) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return binary().size();
|
int64 size = binary().size();
|
||||||
|
for (BufferAllocation::Index i = 0; i < assignment_->Allocations().size();
|
||||||
|
++i) {
|
||||||
|
const BufferAllocation& allocation = assignment_->GetAllocation(i);
|
||||||
|
if (allocation.is_constant()) {
|
||||||
|
size += allocation.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
@ -61,7 +61,7 @@ class GpuExecutable : public Executable {
|
|||||||
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
|
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
|
||||||
~GpuExecutable() override;
|
~GpuExecutable() override;
|
||||||
|
|
||||||
int64 SizeOfGeneratedCodeInBytes() override;
|
int64 SizeOfGeneratedCodeInBytes() const override;
|
||||||
|
|
||||||
// This should be called after set_ir_module_string.
|
// This should be called after set_ir_module_string.
|
||||||
const string& ir_module_string() const { return ir_module_string_; }
|
const string& ir_module_string() const { return ir_module_string_; }
|
||||||
|
@ -806,6 +806,29 @@ XLA_TEST_F(LocalClientExecuteTest, CompilePartitionedExecutable) {
|
|||||||
EXPECT_EQ(2, executables.size());
|
EXPECT_EQ(2, executables.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
XLA_TEST_F(LocalClientExecuteTest,
|
||||||
|
DISABLED_ON_INTERPRETER(SizeOfGeneratedCodeInBytes)) {
|
||||||
|
XlaBuilder builder(TestName());
|
||||||
|
auto x = Parameter(&builder, 0, ShapeUtil::MakeShape(F32, {}), "x");
|
||||||
|
constexpr int size = 100000;
|
||||||
|
TF_ASSERT_OK_AND_ASSIGN(auto literal,
|
||||||
|
LiteralUtil::CreateRandomLiteral<F32>(
|
||||||
|
ShapeUtil::MakeShape(F32, {size}), 0.0, 1.0));
|
||||||
|
auto y = ConstantLiteral(&builder, literal);
|
||||||
|
Add(x, y);
|
||||||
|
|
||||||
|
Shape argument_layout =
|
||||||
|
ShapeUtil::MakeShapeWithLayout(F32, /*dimensions=*/{}, {});
|
||||||
|
TF_ASSERT_OK_AND_ASSIGN(
|
||||||
|
auto executables,
|
||||||
|
local_client_->Compile(builder.Build().ValueOrDie(), {&argument_layout},
|
||||||
|
ExecutableBuildOptions()));
|
||||||
|
EXPECT_EQ(1, executables.size());
|
||||||
|
// The executable should be at least as large as the constant it contains.
|
||||||
|
EXPECT_GT(executables.front()->executable()->SizeOfGeneratedCodeInBytes(),
|
||||||
|
int64{sizeof(float) * size});
|
||||||
|
}
|
||||||
|
|
||||||
XLA_TEST_F(LocalClientExecuteTest, ShapeBufferToLiteralConversion) {
|
XLA_TEST_F(LocalClientExecuteTest, ShapeBufferToLiteralConversion) {
|
||||||
// Test copying Literals to the device as ShapedBuffers, then copying them
|
// Test copying Literals to the device as ShapedBuffers, then copying them
|
||||||
// back again to Literals.
|
// back again to Literals.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user