diff --git a/tensorflow/stream_executor/gpu/asm_compiler.cc b/tensorflow/stream_executor/gpu/asm_compiler.cc index ba095b733ad..25864cc0199 100644 --- a/tensorflow/stream_executor/gpu/asm_compiler.cc +++ b/tensorflow/stream_executor/gpu/asm_compiler.cc @@ -155,7 +155,12 @@ port::StatusOr> CompileGpuAsm(int device_ordinal, int cc_minor; TF_RETURN_IF_ERROR( gpu::GpuDriver::GetComputeCapability(&cc_major, &cc_minor, handle)); + return CompileGpuAsm(cc_major, cc_minor, ptx_contents, options); +} +port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, + const char* ptx_contents, + GpuAsmOpts options) { string ptxas_path; auto env = tensorflow::Env::Default(); for (const string& cuda_root : diff --git a/tensorflow/stream_executor/gpu/asm_compiler.h b/tensorflow/stream_executor/gpu/asm_compiler.h index 49eda633290..e5f67a71242 100644 --- a/tensorflow/stream_executor/gpu/asm_compiler.h +++ b/tensorflow/stream_executor/gpu/asm_compiler.h @@ -26,14 +26,25 @@ limitations under the License. namespace stream_executor { // Compiles the given PTX string using ptxas and returns the resulting machine -// code (i.e. a cubin) as a byte array. +// code (i.e. a cubin) as a byte array. The generated cubin matches the compute +// capabilities of the device associated with 'device_ordinal'. // -// compile_ptx_options is used to query for the CUDA location in case it is +// 'options' is used to query for the CUDA location in case it is // customized in a passed flag, and for controlling ptxas optimizations. port::StatusOr> CompileGpuAsm(int device_ordinal, const char* ptx_contents, GpuAsmOpts options); +// Compiles the given PTX string using ptxas and returns the resulting machine +// code (i.e. a cubin) as a byte array. The generated cubin matches the compute +// capabilities provided by 'cc_major' and 'cc_minor'. +// +// 'options' is used to query for the CUDA location in case it is +// customized in a passed flag, and for controlling ptxas optimizations. +port::StatusOr> CompileGpuAsm(int cc_major, int cc_minor, + const char* ptx_contents, + GpuAsmOpts options); + // Same as CompileGpuAsm, but caches the result, and returns unowned view of // the compiled binary. //