Fully qualifying uses of tensorflow::int64.
PiperOrigin-RevId: 321399703 Change-Id: I42732ead99e062444fa5c507f9fce10f1ace765c
This commit is contained in:
parent
ee781437e8
commit
4c7d80b96a
@ -23,16 +23,18 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/dynamic_annotations.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
using tensorflow::int64;
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF32(
|
||||
const void* run_options_ptr, float* out, float* lhs, float* rhs,
|
||||
int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels,
|
||||
int64 kernel_rows, int64 kernel_cols, int64 kernel_channels,
|
||||
int64 kernel_filters, int64 output_rows, int64 output_cols,
|
||||
int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom,
|
||||
int64 padding_left, int64 padding_right, int64 lhs_row_dilation,
|
||||
int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) {
|
||||
tensorflow::int64 input_batch, tensorflow::int64 input_rows,
|
||||
tensorflow::int64 input_cols, tensorflow::int64 input_channels,
|
||||
tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols,
|
||||
tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters,
|
||||
tensorflow::int64 output_rows, tensorflow::int64 output_cols,
|
||||
tensorflow::int64 row_stride, tensorflow::int64 col_stride,
|
||||
tensorflow::int64 padding_top, tensorflow::int64 padding_bottom,
|
||||
tensorflow::int64 padding_left, tensorflow::int64 padding_right,
|
||||
tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation,
|
||||
tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation) {
|
||||
const xla::ExecutableRunOptions* run_options =
|
||||
static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
|
||||
XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr);
|
||||
@ -46,13 +48,17 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF32(
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF16(
|
||||
const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
|
||||
Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols,
|
||||
int64 input_channels, int64 kernel_rows, int64 kernel_cols,
|
||||
int64 kernel_channels, int64 kernel_filters, int64 output_rows,
|
||||
int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top,
|
||||
int64 padding_bottom, int64 padding_left, int64 padding_right,
|
||||
int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation,
|
||||
int64 rhs_col_dilation) {
|
||||
Eigen::half* rhs, tensorflow::int64 input_batch,
|
||||
tensorflow::int64 input_rows, tensorflow::int64 input_cols,
|
||||
tensorflow::int64 input_channels, tensorflow::int64 kernel_rows,
|
||||
tensorflow::int64 kernel_cols, tensorflow::int64 kernel_channels,
|
||||
tensorflow::int64 kernel_filters, tensorflow::int64 output_rows,
|
||||
tensorflow::int64 output_cols, tensorflow::int64 row_stride,
|
||||
tensorflow::int64 col_stride, tensorflow::int64 padding_top,
|
||||
tensorflow::int64 padding_bottom, tensorflow::int64 padding_left,
|
||||
tensorflow::int64 padding_right, tensorflow::int64 lhs_row_dilation,
|
||||
tensorflow::int64 lhs_col_dilation, tensorflow::int64 rhs_row_dilation,
|
||||
tensorflow::int64 rhs_col_dilation) {
|
||||
const xla::ExecutableRunOptions* run_options =
|
||||
static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
|
||||
XLA_LIGHTWEIGHT_CHECK(run_options->intra_op_thread_pool() != nullptr);
|
||||
|
@ -25,21 +25,16 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/macros.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace {
|
||||
using tensorflow::int32;
|
||||
using tensorflow::int64;
|
||||
} // namespace
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
|
||||
int64 a, int64 b, int64 c, char** values, int32 values_count,
|
||||
int32* values_primitive_type_size_in_bytes, bool is_stable,
|
||||
char* run_options, int64* prof_counters,
|
||||
tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c, char** values, tensorflow::int32 values_count,
|
||||
tensorflow::int32* values_primitive_type_size_in_bytes, bool is_stable,
|
||||
char* run_options, tensorflow::int64* prof_counters,
|
||||
void (*less_than)(char*, char*, char**, char**, tensorflow::int64*)) {
|
||||
// 'values' and 'values_primitive_type_size_in_bytes' are managed by the JIT
|
||||
// code, so msan can't tell they are initialized.
|
||||
TF_ANNOTATE_MEMORY_IS_INITIALIZED(values, values_count * sizeof(char*));
|
||||
TF_ANNOTATE_MEMORY_IS_INITIALIZED(values_primitive_type_size_in_bytes,
|
||||
values_count * sizeof(int32));
|
||||
values_count * sizeof(tensorflow::int32));
|
||||
|
||||
// High-level idea of the iteration/sorting logic:
|
||||
// Conceptually we have a 3-dimensional shape [a, b, c]. b corresponds to the
|
||||
@ -50,16 +45,16 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
|
||||
// 'base_offset' value which points to the first element in that row, and add
|
||||
// i * c for accessing the 'i'-th element in that row.
|
||||
|
||||
int64 sort_dimension_elements = b;
|
||||
int64 num_iteration_elements = a * c;
|
||||
int64 sort_dimension_offset = c;
|
||||
tensorflow::int64 sort_dimension_elements = b;
|
||||
tensorflow::int64 num_iteration_elements = a * c;
|
||||
tensorflow::int64 sort_dimension_offset = c;
|
||||
|
||||
std::unique_ptr<int64[]> indices(new int64[sort_dimension_elements]);
|
||||
std::unique_ptr<tensorflow::int64[]> indices(new tensorflow::int64[sort_dimension_elements]);
|
||||
std::unique_ptr<char*[]> comparison_values(new char*[2 * values_count]);
|
||||
std::iota(indices.get(), indices.get() + sort_dimension_elements, 0);
|
||||
std::unique_ptr<std::string[]> reordered_values(
|
||||
new std::string[sort_dimension_elements]);
|
||||
for (int64 index = 0; index < num_iteration_elements; ++index) {
|
||||
for (tensorflow::int64 index = 0; index < num_iteration_elements; ++index) {
|
||||
// If the sort should be stable, we have to reinitialize indices to iota to
|
||||
// guarantee that we still keep the relative order in case of ties.
|
||||
if (is_stable && index > 0) {
|
||||
@ -71,14 +66,14 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
|
||||
// calculating the base offset, we need to multiply the index into the 'a'
|
||||
// dimension with 'b' * 'c'.
|
||||
// 'index' / 'c' * 'c' * 'b' = ('index' - 'index' % 'c') * 'b'.
|
||||
int64 base_offset =
|
||||
tensorflow::int64 base_offset =
|
||||
index % sort_dimension_offset +
|
||||
(index - index % sort_dimension_offset) * sort_dimension_elements;
|
||||
auto compare_function = [&](int64 a, int64 b) -> bool {
|
||||
for (int32 i = 0; i < values_count; ++i) {
|
||||
int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
|
||||
auto compare_function = [&](tensorflow::int64 a, tensorflow::int64 b) -> bool {
|
||||
for (tensorflow::int32 i = 0; i < values_count; ++i) {
|
||||
tensorflow::int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) *
|
||||
values_primitive_type_size_in_bytes[i];
|
||||
int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
|
||||
tensorflow::int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) *
|
||||
values_primitive_type_size_in_bytes[i];
|
||||
comparison_values[i * 2] = values[i] + memory_index_lhs;
|
||||
comparison_values[i * 2 + 1] = values[i] + memory_index_rhs;
|
||||
@ -97,9 +92,9 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
|
||||
}
|
||||
|
||||
// Reorder the values according to the order defined by 'indices'.
|
||||
for (int32 idx = 0; idx < values_count; ++idx) {
|
||||
for (int64 i = 0; i < sort_dimension_elements; ++i) {
|
||||
int64 memory_index =
|
||||
for (tensorflow::int32 idx = 0; idx < values_count; ++idx) {
|
||||
for (tensorflow::int64 i = 0; i < sort_dimension_elements; ++i) {
|
||||
tensorflow::int64 memory_index =
|
||||
(base_offset + indices[i] * sort_dimension_offset) *
|
||||
values_primitive_type_size_in_bytes[idx];
|
||||
|
||||
@ -107,8 +102,8 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort(
|
||||
std::string(values[idx] + memory_index,
|
||||
values_primitive_type_size_in_bytes[idx]);
|
||||
}
|
||||
for (int64 i = 0; i < sort_dimension_elements; ++i) {
|
||||
int64 memory_index = (base_offset + i * sort_dimension_offset) *
|
||||
for (tensorflow::int64 i = 0; i < sort_dimension_elements; ++i) {
|
||||
tensorflow::int64 memory_index = (base_offset + i * sort_dimension_offset) *
|
||||
values_primitive_type_size_in_bytes[idx];
|
||||
memcpy(values[idx] + memory_index, reordered_values[i].c_str(),
|
||||
values_primitive_type_size_in_bytes[idx]);
|
||||
|
@ -27,9 +27,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
|
||||
#endif
|
||||
|
||||
using tensorflow::int32;
|
||||
using tensorflow::int64;
|
||||
|
||||
namespace {
|
||||
|
||||
bool Is16BytesAligned(void* ptr) {
|
||||
@ -37,19 +34,20 @@ bool Is16BytesAligned(void* ptr) {
|
||||
}
|
||||
|
||||
template <typename T, Eigen::AlignmentType Alignment>
|
||||
void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
|
||||
int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
const xla::ExecutableRunOptions* run_options =
|
||||
static_cast<const xla::ExecutableRunOptions*>(run_options_ptr);
|
||||
|
||||
int64 lhs_rows = m;
|
||||
int64 lhs_cols = k;
|
||||
tensorflow::int64 lhs_rows = m;
|
||||
tensorflow::int64 lhs_cols = k;
|
||||
if (transpose_lhs) {
|
||||
std::swap(lhs_rows, lhs_cols);
|
||||
}
|
||||
|
||||
int64 rhs_rows = k;
|
||||
int64 rhs_cols = n;
|
||||
tensorflow::int64 rhs_rows = k;
|
||||
tensorflow::int64 rhs_cols = n;
|
||||
if (transpose_rhs) {
|
||||
std::swap(rhs_rows, rhs_cols);
|
||||
}
|
||||
@ -75,8 +73,9 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
|
||||
|
||||
template <typename T>
|
||||
void MatMulDispatch(const void* run_options_ptr, T* out, T* lhs, T* rhs,
|
||||
int64 m, int64 n, int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
tensorflow::int64 m, tensorflow::int64 n,
|
||||
tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
bool all_buffers_16b_aligned =
|
||||
Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs);
|
||||
|
||||
@ -94,45 +93,52 @@ void MatMulDispatch(const void* run_options_ptr, T* out, T* lhs, T* rhs,
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF16(
|
||||
const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
|
||||
Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
Eigen::half* rhs, tensorflow::int64 m, tensorflow::int64 n,
|
||||
tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<Eigen::half>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF32(
|
||||
const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m,
|
||||
int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
const void* run_options_ptr, float* out, float* lhs, float* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<float>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
|
||||
transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF64(
|
||||
const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m,
|
||||
int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
const void* run_options_ptr, double* out, double* lhs, double* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<double>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
|
||||
transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulC64(
|
||||
const void* run_options_ptr, std::complex<float>* out,
|
||||
std::complex<float>* lhs, std::complex<float>* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
std::complex<float>* lhs, std::complex<float>* rhs, tensorflow::int64 m,
|
||||
tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<std::complex<float>>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulC128(
|
||||
const void* run_options_ptr, std::complex<double>* out,
|
||||
std::complex<double>* lhs, std::complex<double>* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
std::complex<double>* lhs, std::complex<double>* rhs, tensorflow::int64 m,
|
||||
tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<std::complex<double>>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulS32(
|
||||
const void* run_options_ptr, int32* out, int32* lhs, int32* rhs, int64 m,
|
||||
int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
MatMulDispatch<int32>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
|
||||
transpose_rhs);
|
||||
const void* run_options_ptr, tensorflow::int32* out, tensorflow::int32* lhs,
|
||||
tensorflow::int32* rhs, tensorflow::int64 m, tensorflow::int64 n,
|
||||
tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
MatMulDispatch<tensorflow::int32>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
@ -19,18 +19,20 @@ limitations under the License.
|
||||
#include "tensorflow/core/platform/dynamic_annotations.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
using tensorflow::int64;
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedConvF16(
|
||||
const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
|
||||
Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols,
|
||||
int64 input_channels, int64 kernel_rows, int64 kernel_cols,
|
||||
int64 kernel_channels, int64 kernel_filters, int64 output_rows,
|
||||
int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top,
|
||||
int64 padding_bottom, int64 padding_left, int64 padding_right,
|
||||
int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation,
|
||||
int64 rhs_col_dilation) {
|
||||
Eigen::half* rhs, tensorflow::int64 input_batch,
|
||||
tensorflow::int64 input_rows, tensorflow::int64 input_cols,
|
||||
tensorflow::int64 input_channels, tensorflow::int64 kernel_rows,
|
||||
tensorflow::int64 kernel_cols, tensorflow::int64 kernel_channels,
|
||||
tensorflow::int64 kernel_filters, tensorflow::int64 output_rows,
|
||||
tensorflow::int64 output_cols, tensorflow::int64 row_stride,
|
||||
tensorflow::int64 col_stride, tensorflow::int64 padding_top,
|
||||
tensorflow::int64 padding_bottom, tensorflow::int64 padding_left,
|
||||
tensorflow::int64 padding_right, tensorflow::int64 lhs_row_dilation,
|
||||
tensorflow::int64 lhs_col_dilation, tensorflow::int64 rhs_row_dilation,
|
||||
tensorflow::int64 rhs_col_dilation) {
|
||||
tensorflow::xla::EigenConvImpl(
|
||||
Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
|
||||
input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
|
||||
@ -42,12 +44,16 @@ __xla_cpu_runtime_EigenSingleThreadedConvF16(
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedConvF32(
|
||||
const void* run_options_ptr, float* out, float* lhs, float* rhs,
|
||||
int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels,
|
||||
int64 kernel_rows, int64 kernel_cols, int64 kernel_channels,
|
||||
int64 kernel_filters, int64 output_rows, int64 output_cols,
|
||||
int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom,
|
||||
int64 padding_left, int64 padding_right, int64 lhs_row_dilation,
|
||||
int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) {
|
||||
tensorflow::int64 input_batch, tensorflow::int64 input_rows,
|
||||
tensorflow::int64 input_cols, tensorflow::int64 input_channels,
|
||||
tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols,
|
||||
tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters,
|
||||
tensorflow::int64 output_rows, tensorflow::int64 output_cols,
|
||||
tensorflow::int64 row_stride, tensorflow::int64 col_stride,
|
||||
tensorflow::int64 padding_top, tensorflow::int64 padding_bottom,
|
||||
tensorflow::int64 padding_left, tensorflow::int64 padding_right,
|
||||
tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation,
|
||||
tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation) {
|
||||
tensorflow::xla::EigenConvImpl(
|
||||
Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
|
||||
input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
|
||||
|
@ -23,9 +23,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
|
||||
#endif
|
||||
|
||||
using tensorflow::int32;
|
||||
using tensorflow::int64;
|
||||
|
||||
namespace {
|
||||
|
||||
bool Is16BytesAligned(void* ptr) {
|
||||
@ -33,16 +30,17 @@ bool Is16BytesAligned(void* ptr) {
|
||||
}
|
||||
|
||||
template <typename T, Eigen::AlignmentType Alignment>
|
||||
void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
|
||||
int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
int64 lhs_rows = m;
|
||||
int64 lhs_cols = k;
|
||||
void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
tensorflow::int64 lhs_rows = m;
|
||||
tensorflow::int64 lhs_cols = k;
|
||||
if (transpose_lhs) {
|
||||
std::swap(lhs_rows, lhs_cols);
|
||||
}
|
||||
|
||||
int64 rhs_rows = k;
|
||||
int64 rhs_cols = n;
|
||||
tensorflow::int64 rhs_rows = k;
|
||||
tensorflow::int64 rhs_cols = n;
|
||||
if (transpose_rhs) {
|
||||
std::swap(rhs_rows, rhs_cols);
|
||||
}
|
||||
@ -67,8 +65,10 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
|
||||
|
||||
template <typename T>
|
||||
void SingleThreadedMatMulDispatch(const void* run_options_ptr, T* out, T* lhs,
|
||||
T* rhs, int64 m, int64 n, int64 k,
|
||||
int32 transpose_lhs, int32 transpose_rhs) {
|
||||
T* rhs, tensorflow::int64 m,
|
||||
tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
bool all_buffers_16b_aligned =
|
||||
Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs);
|
||||
|
||||
@ -86,28 +86,27 @@ void SingleThreadedMatMulDispatch(const void* run_options_ptr, T* out, T* lhs,
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulF16(
|
||||
const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
|
||||
Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
Eigen::half* rhs, tensorflow::int64 m, tensorflow::int64 n,
|
||||
tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<Eigen::half>(run_options_ptr, out, lhs, rhs, m,
|
||||
n, k, transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulF32(const void* run_options_ptr,
|
||||
float* out, float* lhs,
|
||||
float* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulF32(
|
||||
const void* run_options_ptr, float* out, float* lhs, float* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<float>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulF64(const void* run_options_ptr,
|
||||
double* out, double* lhs,
|
||||
double* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulF64(
|
||||
const void* run_options_ptr, double* out, double* lhs, double* rhs,
|
||||
tensorflow::int64 m, tensorflow::int64 n, tensorflow::int64 k,
|
||||
tensorflow::int32 transpose_lhs, tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<double>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
}
|
||||
@ -115,8 +114,9 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulF64(const void* run_options_ptr,
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulC64(
|
||||
const void* run_options_ptr, std::complex<float>* out,
|
||||
std::complex<float>* lhs, std::complex<float>* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
std::complex<float>* lhs, std::complex<float>* rhs, tensorflow::int64 m,
|
||||
tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<std::complex<float>>(
|
||||
run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
|
||||
}
|
||||
@ -124,18 +124,19 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulC64(
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulC128(
|
||||
const void* run_options_ptr, std::complex<double>* out,
|
||||
std::complex<double>* lhs, std::complex<double>* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs, int32 transpose_rhs) {
|
||||
std::complex<double>* lhs, std::complex<double>* rhs, tensorflow::int64 m,
|
||||
tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<std::complex<double>>(
|
||||
run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
||||
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulS32(const void* run_options_ptr,
|
||||
int32* out, int32* lhs,
|
||||
int32* rhs, int64 m, int64 n,
|
||||
int64 k, int32 transpose_lhs,
|
||||
int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<int32>(run_options_ptr, out, lhs, rhs, m, n, k,
|
||||
transpose_lhs, transpose_rhs);
|
||||
__xla_cpu_runtime_EigenSingleThreadedMatMulS32(
|
||||
const void* run_options_ptr, tensorflow::int32* out, tensorflow::int32* lhs,
|
||||
tensorflow::int32* rhs, tensorflow::int64 m, tensorflow::int64 n,
|
||||
tensorflow::int64 k, tensorflow::int32 transpose_lhs,
|
||||
tensorflow::int32 transpose_rhs) {
|
||||
SingleThreadedMatMulDispatch<tensorflow::int32>(
|
||||
run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user