250 lines
9.3 KiB
C++
250 lines
9.3 KiB
C++
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
|
|
#include "tensorflow/stream_executor/kernel_spec.h"
|
|
#include "absl/strings/string_view.h"
|
|
|
|
namespace stream_executor {
|
|
|
|
KernelLoaderSpec::KernelLoaderSpec(absl::string_view kernelname)
|
|
: kernelname_(std::string(kernelname)) {}
|
|
|
|
OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(absl::string_view filename,
|
|
absl::string_view kernelname)
|
|
: KernelLoaderSpec(kernelname), filename_(std::string(filename)) {}
|
|
|
|
CudaPtxOnDisk::CudaPtxOnDisk(absl::string_view filename,
|
|
absl::string_view kernelname)
|
|
: OnDiskKernelLoaderSpec(filename, kernelname) {}
|
|
|
|
CudaCubinOnDisk::CudaCubinOnDisk(absl::string_view filename,
|
|
absl::string_view kernelname)
|
|
: OnDiskKernelLoaderSpec(filename, kernelname) {}
|
|
|
|
CudaCubinInMemory::CudaCubinInMemory(const char *bytes,
|
|
absl::string_view kernelname)
|
|
: KernelLoaderSpec(kernelname), bytes_(bytes) {}
|
|
|
|
bool CompareComputeCapability(const std::tuple<int, int> &lhs,
|
|
const std::tuple<int, int> &rhs) {
|
|
return std::get<0>(lhs) < std::get<0>(rhs) ||
|
|
(std::get<0>(lhs) == std::get<0>(rhs) &&
|
|
std::get<1>(lhs) < std::get<1>(rhs));
|
|
}
|
|
|
|
const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0};
|
|
|
|
CudaPtxInMemory::CudaPtxInMemory(absl::string_view ptx,
|
|
absl::string_view kernel_name,
|
|
bool ptx_compressed)
|
|
: KernelLoaderSpec(kernel_name),
|
|
ptx_by_compute_capability_(CompareComputeCapability) {
|
|
if (ptx_compressed) {
|
|
// Lazy decompression. Put an empty string in decompressed_ptx_ showing that
|
|
// the original ptx is compressed.
|
|
decompressed_ptx_[ptx.data()] = "";
|
|
}
|
|
ptx_by_compute_capability_[kMinimumCapability] = ptx.data();
|
|
}
|
|
|
|
CudaPtxInMemory::CudaPtxInMemory(
|
|
const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list,
|
|
absl::string_view kernel_name, bool ptx_compressed)
|
|
: KernelLoaderSpec(kernel_name),
|
|
ptx_by_compute_capability_(CompareComputeCapability) {
|
|
for (const auto &spec : spec_list) {
|
|
int major, minor;
|
|
absl::string_view ptx;
|
|
std::tie(major, minor, ptx) = spec;
|
|
if (ptx_compressed) {
|
|
// Lazy decompression. Put an empty string in decompressed_ptx_ showing
|
|
// that the original ptx is compressed.
|
|
decompressed_ptx_[ptx.data()] = "";
|
|
}
|
|
ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data();
|
|
}
|
|
}
|
|
|
|
std::string CudaPtxInMemory::DecompressPtx(const char *ptx) {
|
|
// Get the length of the PTX string from the beginning of the buffer.
|
|
uint64 ptx_length = *reinterpret_cast<const uint64 *>(ptx);
|
|
// Get the PTX string from the buffer with offset and length.
|
|
std::string compressed_ptx(ptx + sizeof(uint64),
|
|
ptx + sizeof(uint64) + ptx_length);
|
|
std::string decompressed_ptx;
|
|
// Decompress the PTX string with bzip2.
|
|
LOG(FATAL) << "bzip2 decompression is not supported yet.";
|
|
return decompressed_ptx;
|
|
}
|
|
|
|
const char *CudaPtxInMemory::default_text() const {
|
|
if (ptx_by_compute_capability_.empty()) {
|
|
return nullptr;
|
|
}
|
|
|
|
absl::MutexLock lock(&mu_);
|
|
|
|
auto ptx = ptx_by_compute_capability_.begin()->second;
|
|
// Check if there is an entry in decompressed ptx table.
|
|
auto decompressed_ptx_iter = decompressed_ptx_.find(ptx);
|
|
if (decompressed_ptx_iter != decompressed_ptx_.end()) {
|
|
// If the decompressed string is empty, which means the ptx hasn't been
|
|
// decompressed, decompress it here.
|
|
if (decompressed_ptx_iter->second.empty()) {
|
|
decompressed_ptx_iter->second = DecompressPtx(ptx);
|
|
}
|
|
return decompressed_ptx_iter->second.c_str();
|
|
}
|
|
return ptx;
|
|
}
|
|
|
|
const char *CudaPtxInMemory::original_default_text() const {
|
|
if (ptx_by_compute_capability_.empty()) {
|
|
return nullptr;
|
|
}
|
|
|
|
return ptx_by_compute_capability_.begin()->second;
|
|
}
|
|
|
|
const char *CudaPtxInMemory::text(int compute_capability_major,
|
|
int compute_capability_minor) const {
|
|
std::tuple<int, int> capability{compute_capability_major,
|
|
compute_capability_minor};
|
|
|
|
auto ptx_iter = ptx_by_compute_capability_.find(capability);
|
|
if (ptx_iter == ptx_by_compute_capability_.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
absl::MutexLock lock(&mu_);
|
|
|
|
// Check if there is an entry in decompressed ptx table.
|
|
auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second);
|
|
if (decompressed_ptx_iter != decompressed_ptx_.end()) {
|
|
// If the decompressed string is empty, which means the ptx hasn't been
|
|
// decompressed, decompress it here.
|
|
if (decompressed_ptx_iter->second.empty()) {
|
|
decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second);
|
|
}
|
|
return decompressed_ptx_iter->second.c_str();
|
|
}
|
|
return ptx_iter->second;
|
|
}
|
|
|
|
const char *CudaPtxInMemory::original_text(int compute_capability_major,
|
|
int compute_capability_minor) const {
|
|
std::tuple<int, int> capability{compute_capability_major,
|
|
compute_capability_minor};
|
|
|
|
auto ptx_iter = ptx_by_compute_capability_.find(capability);
|
|
if (ptx_iter == ptx_by_compute_capability_.end()) {
|
|
return nullptr;
|
|
}
|
|
|
|
return ptx_iter->second;
|
|
}
|
|
|
|
OpenCLTextOnDisk::OpenCLTextOnDisk(absl::string_view filename,
|
|
absl::string_view kernelname)
|
|
: OnDiskKernelLoaderSpec(filename, kernelname) {}
|
|
|
|
OpenCLTextInMemory::OpenCLTextInMemory(absl::string_view text,
|
|
absl::string_view kernelname)
|
|
: KernelLoaderSpec(kernelname), text_(text) {}
|
|
|
|
OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(absl::string_view filename,
|
|
absl::string_view kernelname)
|
|
: OnDiskKernelLoaderSpec(filename, kernelname) {}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk(
|
|
absl::string_view filename, absl::string_view kernelname) {
|
|
CHECK(ocl_text_on_disk_ == nullptr);
|
|
ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk(
|
|
absl::string_view filename, absl::string_view kernelname) {
|
|
CHECK(ocl_binary_on_disk_ == nullptr);
|
|
ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory(
|
|
absl::string_view filename, absl::string_view kernelname) {
|
|
CHECK(ocl_text_in_memory_ == nullptr);
|
|
ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk(
|
|
absl::string_view filename, absl::string_view kernelname) {
|
|
CHECK(cuda_ptx_on_disk_ == nullptr);
|
|
cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory(
|
|
const char *bytes, absl::string_view kernelname) {
|
|
CHECK(cuda_cubin_in_memory_ == nullptr);
|
|
cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk(
|
|
absl::string_view filename, absl::string_view kernelname) {
|
|
CHECK(cuda_cubin_on_disk_ == nullptr);
|
|
cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
|
|
absl::string_view ptx, absl::string_view kernelname) {
|
|
CHECK(cuda_ptx_in_memory_ == nullptr);
|
|
cuda_ptx_in_memory_.reset(
|
|
new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
|
|
absl::string_view ptx, absl::string_view kernelname) {
|
|
CHECK(cuda_ptx_in_memory_ == nullptr);
|
|
cuda_ptx_in_memory_.reset(
|
|
new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory(
|
|
std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
|
|
absl::string_view kernelname) {
|
|
CHECK(cuda_ptx_in_memory_ == nullptr);
|
|
cuda_ptx_in_memory_.reset(
|
|
new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory(
|
|
std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list,
|
|
absl::string_view kernelname) {
|
|
CHECK(cuda_ptx_in_memory_ == nullptr);
|
|
cuda_ptx_in_memory_.reset(
|
|
new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */});
|
|
return this;
|
|
}
|
|
|
|
MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {}
|
|
|
|
} // namespace stream_executor
|