Fix 64-bit integer portability problems in TensorFlow compiler.
Removes reliance on the assumption that tensorflow::int64 is long long. This is intended to eventually enable changing the definition to int64_t from <cstdint>. PiperOrigin-RevId: 290128329 Change-Id: I3b6b8e88c64456eedb38fd016a5cb2960b594abf
This commit is contained in:
parent
de37b1eaca
commit
9b3c87d58b
@ -74,16 +74,16 @@ void DumpStatsToStdout(const Stats& stats) {
|
|||||||
const int kBufSize = 1000;
|
const int kBufSize = 1000;
|
||||||
char buf[kBufSize];
|
char buf[kBufSize];
|
||||||
snprintf(buf, kBufSize, "Mean with %2.0f%% trimmed:", trim_ratio * 100);
|
snprintf(buf, kBufSize, "Mean with %2.0f%% trimmed:", trim_ratio * 100);
|
||||||
const string label_trimmed(buf);
|
std::string label_trimmed(buf);
|
||||||
snprintf(buf, kBufSize, "Mean of %2.0f%% best:", best_ratio * 100);
|
snprintf(buf, kBufSize, "Mean of %2.0f%% best:", best_ratio * 100);
|
||||||
const string label_best(buf);
|
std::string label_best(buf);
|
||||||
std::vector<std::pair<string, double>> groups = {
|
std::vector<std::pair<std::string, double>> groups = {
|
||||||
{"Best:", sorted_us.front()},
|
{"Best:", sorted_us.front()},
|
||||||
{"Worst:", sorted_us.back()},
|
{"Worst:", sorted_us.back()},
|
||||||
{"Median:", sorted_us[count_us / 2]},
|
{"Median:", sorted_us[count_us / 2]},
|
||||||
{"Mean:", sum_us / count_us},
|
{"Mean:", sum_us / count_us},
|
||||||
{label_trimmed, sum_us_trimmed / count_us_trimmed},
|
{std::move(label_trimmed), sum_us_trimmed / count_us_trimmed},
|
||||||
{label_best, sum_us_best / count_us_best},
|
{std::move(label_best), sum_us_best / count_us_best},
|
||||||
};
|
};
|
||||||
int max_label_size = 0;
|
int max_label_size = 0;
|
||||||
double max_us = 0;
|
double max_us = 0;
|
||||||
@ -102,7 +102,7 @@ void DumpStatsToStdout(const Stats& stats) {
|
|||||||
}
|
}
|
||||||
// Dump stats out.
|
// Dump stats out.
|
||||||
printf("Benchmark ran %zu iterations over %lld us\n", count_us,
|
printf("Benchmark ran %zu iterations over %lld us\n", count_us,
|
||||||
stats.total_us);
|
static_cast<long long>(stats.total_us)); // NOLINT
|
||||||
for (const auto& g : groups) {
|
for (const auto& g : groups) {
|
||||||
printf(" %-*s %*.3f us\n", max_label_size, g.first.c_str(), max_digits + 4,
|
printf(" %-*s %*.3f us\n", max_label_size, g.first.c_str(), max_digits + 4,
|
||||||
g.second);
|
g.second);
|
||||||
@ -114,7 +114,8 @@ void Benchmark(const Options& options, const BenchmarkFn& fn, Stats* stats) {
|
|||||||
const int64 max_us = (options.max_micros <= 0 && options.max_iters <= 0)
|
const int64 max_us = (options.max_micros <= 0 && options.max_iters <= 0)
|
||||||
? Options::kDefaultMicros
|
? Options::kDefaultMicros
|
||||||
: options.max_micros;
|
: options.max_micros;
|
||||||
printf("Running benchmark for %lld us\n", max_us);
|
// NOLINTNEXTLINE
|
||||||
|
printf("Running benchmark for %lld us\n", static_cast<long long>(max_us));
|
||||||
const int64 start_us = NowMicros();
|
const int64 start_us = NowMicros();
|
||||||
int64 iters = 0;
|
int64 iters = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -278,8 +278,10 @@ class MatrixDiagOp : public XlaOpKernel {
|
|||||||
errors::InvalidArgument(
|
errors::InvalidArgument(
|
||||||
"The number of diagonals provided in the input does not "
|
"The number of diagonals provided in the input does not "
|
||||||
"match the lower_diag_index and upper_diag_index range."));
|
"match the lower_diag_index and upper_diag_index range."));
|
||||||
const int64 min_num_rows = max_diag_len - std::min(upper_diag_index, 0LL);
|
const int64 min_num_rows =
|
||||||
const int64 min_num_cols = max_diag_len + std::max(lower_diag_index, 0LL);
|
max_diag_len - std::min(upper_diag_index, int64{0});
|
||||||
|
const int64 min_num_cols =
|
||||||
|
max_diag_len + std::max(lower_diag_index, int64{0});
|
||||||
OP_REQUIRES(context, num_rows == -1 || num_rows >= min_num_rows,
|
OP_REQUIRES(context, num_rows == -1 || num_rows >= min_num_rows,
|
||||||
errors::InvalidArgument("The number of rows is too small."));
|
errors::InvalidArgument("The number of rows is too small."));
|
||||||
OP_REQUIRES(context, num_cols == -1 || num_cols >= min_num_cols,
|
OP_REQUIRES(context, num_cols == -1 || num_cols >= min_num_cols,
|
||||||
@ -387,8 +389,8 @@ class MatrixDiagPartOp : public XlaOpKernel {
|
|||||||
const int num_diags = upper_diag_index - lower_diag_index + 1;
|
const int num_diags = upper_diag_index - lower_diag_index + 1;
|
||||||
if (num_diags > 1) output_shape.AddDim(num_diags);
|
if (num_diags > 1) output_shape.AddDim(num_diags);
|
||||||
const int32 max_diag_len =
|
const int32 max_diag_len =
|
||||||
std::min(num_rows + std::min(upper_diag_index, 0LL),
|
std::min(num_rows + std::min(upper_diag_index, int64{0}),
|
||||||
num_cols - std::max(lower_diag_index, 0LL));
|
num_cols - std::max(lower_diag_index, int64{0}));
|
||||||
output_shape.AddDim(max_diag_len);
|
output_shape.AddDim(max_diag_len);
|
||||||
|
|
||||||
// Computes output.
|
// Computes output.
|
||||||
@ -502,8 +504,8 @@ class MatrixSetDiagOp : public XlaOpKernel {
|
|||||||
expected_diag_shape.RemoveLastDims(2);
|
expected_diag_shape.RemoveLastDims(2);
|
||||||
if (num_diags > 1) expected_diag_shape.AddDim(num_diags);
|
if (num_diags > 1) expected_diag_shape.AddDim(num_diags);
|
||||||
const int32 max_diag_len =
|
const int32 max_diag_len =
|
||||||
std::min(num_rows + std::min(upper_diag_index, 0LL),
|
std::min(num_rows + std::min(upper_diag_index, int64{0}),
|
||||||
num_cols - std::max(lower_diag_index, 0LL));
|
num_cols - std::max(lower_diag_index, int64{0}));
|
||||||
expected_diag_shape.AddDim(max_diag_len);
|
expected_diag_shape.AddDim(max_diag_len);
|
||||||
OP_REQUIRES(
|
OP_REQUIRES(
|
||||||
context, expected_diag_shape == diag_shape,
|
context, expected_diag_shape == diag_shape,
|
||||||
|
@ -125,7 +125,7 @@ XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k) {
|
|||||||
|
|
||||||
// Calculate the indices of diagonal part with offset k.
|
// Calculate the indices of diagonal part with offset k.
|
||||||
const int64 diag_len =
|
const int64 diag_len =
|
||||||
std::max(std::min(m + std::min(k, 0), n - std::max(k, 0)), 0LL);
|
std::max(std::min(m + std::min(k, 0), n - std::max(k, 0)), int64{0});
|
||||||
XlaOp diag_base_indices = BroadcastInDim(Iota(builder, S32, diag_len),
|
XlaOp diag_base_indices = BroadcastInDim(Iota(builder, S32, diag_len),
|
||||||
{diag_len, num_index_dims}, {0});
|
{diag_len, num_index_dims}, {0});
|
||||||
XlaOp diag_offset =
|
XlaOp diag_offset =
|
||||||
|
@ -126,8 +126,8 @@ std::vector<std::pair<int64, int64>> MakePadding(
|
|||||||
window_dimension - input_dimension,
|
window_dimension - input_dimension,
|
||||||
0);
|
0);
|
||||||
low_high_padding.emplace_back(
|
low_high_padding.emplace_back(
|
||||||
tensorflow::MathUtil::FloorOfRatio(padding_size, 2ll),
|
tensorflow::MathUtil::FloorOfRatio(padding_size, int64{2}),
|
||||||
tensorflow::MathUtil::CeilOfRatio(padding_size, 2ll));
|
tensorflow::MathUtil::CeilOfRatio(padding_size, int64{2}));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -608,7 +608,7 @@ int NPyBfloat16_ArgMinFunc(void* data, npy_intp n, npy_intp* min_ind,
|
|||||||
|
|
||||||
// NumPy casts
|
// NumPy casts
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, typename Enable = void>
|
||||||
struct TypeDescriptor {
|
struct TypeDescriptor {
|
||||||
// typedef ... T; // Representation type in memory for NumPy values of type
|
// typedef ... T; // Representation type in memory for NumPy values of type
|
||||||
// static int Dtype() { return NPY_...; } // Numpy type number for T.
|
// static int Dtype() { return NPY_...; } // Numpy type number for T.
|
||||||
@ -638,9 +638,12 @@ struct TypeDescriptor<uint32> {
|
|||||||
static int Dtype() { return NPY_UINT32; }
|
static int Dtype() { return NPY_UINT32; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <typename Uint64Type>
|
||||||
struct TypeDescriptor<uint64> {
|
struct TypeDescriptor<
|
||||||
typedef uint64 T;
|
Uint64Type, typename std::enable_if<std::is_integral<Uint64Type>::value &&
|
||||||
|
!std::is_signed<Uint64Type>::value &&
|
||||||
|
sizeof(Uint64Type) == 8>::type> {
|
||||||
|
typedef Uint64Type T;
|
||||||
static int Dtype() { return NPY_UINT64; }
|
static int Dtype() { return NPY_UINT64; }
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -662,9 +665,12 @@ struct TypeDescriptor<int32> {
|
|||||||
static int Dtype() { return NPY_INT32; }
|
static int Dtype() { return NPY_INT32; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <typename Int64Type>
|
||||||
struct TypeDescriptor<int64> {
|
struct TypeDescriptor<
|
||||||
typedef int64 T;
|
Int64Type, typename std::enable_if<std::is_integral<Int64Type>::value &&
|
||||||
|
std::is_signed<Int64Type>::value &&
|
||||||
|
sizeof(Int64Type) == 8>::type> {
|
||||||
|
typedef Int64Type T;
|
||||||
static int Dtype() { return NPY_INT64; }
|
static int Dtype() { return NPY_INT64; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -299,7 +299,7 @@ int IrEmitter::MinimumAlignmentForPrimitiveType(PrimitiveType primitive_type) {
|
|||||||
DCHECK_LE(byte_size, 16);
|
DCHECK_LE(byte_size, 16);
|
||||||
|
|
||||||
// Allocations may be 8-byte aligned if part of a small block.
|
// Allocations may be 8-byte aligned if part of a small block.
|
||||||
return std::min(8LL, byte_size);
|
return std::min(int64{8}, byte_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
|
int64 IrEmitter::ByteSizeOf(const Shape& shape) const {
|
||||||
|
@ -128,7 +128,7 @@ bool IsCublasGemm(const HloInstruction& hlo) {
|
|||||||
std::array<int64, 3> GetReductionTiling(
|
std::array<int64, 3> GetReductionTiling(
|
||||||
const ReductionDimensions& reduction_dimensions) {
|
const ReductionDimensions& reduction_dimensions) {
|
||||||
if (reduction_dimensions.is_row_reduction) {
|
if (reduction_dimensions.is_row_reduction) {
|
||||||
int64 tile_z = std::min(reduction_dimensions.dimensions[0], 8LL);
|
int64 tile_z = std::min(reduction_dimensions.dimensions[0], int64{8});
|
||||||
if (reduction_dimensions.dimensions[1] == 1) {
|
if (reduction_dimensions.dimensions[1] == 1) {
|
||||||
CHECK_EQ(reduction_dimensions.dimensions[0], 1);
|
CHECK_EQ(reduction_dimensions.dimensions[0], 1);
|
||||||
return {tile_z, 1, 16};
|
return {tile_z, 1, 16};
|
||||||
|
@ -86,7 +86,8 @@ LaunchDimensions CalculateLaunchDimensions(
|
|||||||
// need more registers to hold intermediate values. Reduce the number of
|
// need more registers to hold intermediate values. Reduce the number of
|
||||||
// blocks per thread to increase the number of registers available to ptxas.
|
// blocks per thread to increase the number of registers available to ptxas.
|
||||||
// Make sure we still have a multiple of 32.
|
// Make sure we still have a multiple of 32.
|
||||||
threads_per_block = RoundUpToNearest(threads_per_block / unroll_factor, 32LL);
|
threads_per_block =
|
||||||
|
RoundUpToNearest(threads_per_block / unroll_factor, int64{32});
|
||||||
if (num_elements < threads_per_block) {
|
if (num_elements < threads_per_block) {
|
||||||
threads_per_block = num_elements;
|
threads_per_block = num_elements;
|
||||||
VLOG(2) << "Update # of threads per block to the element count ("
|
VLOG(2) << "Update # of threads per block to the element count ("
|
||||||
|
@ -1769,7 +1769,7 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
|
|||||||
// output_dim_size);
|
// output_dim_size);
|
||||||
input_index_clamped[i] =
|
input_index_clamped[i] =
|
||||||
std::min(operand_shape.dimensions(i) - output_dim_size,
|
std::min(operand_shape.dimensions(i) - output_dim_size,
|
||||||
std::max(0LL, input_gather_index[i]));
|
std::max(int64{0}, input_gather_index[i]));
|
||||||
}
|
}
|
||||||
for (int i = 0, e = input_index.size(); i < e; i++) {
|
for (int i = 0, e = input_index.size(); i < e; i++) {
|
||||||
input_index[i] = input_index_clamped[i] + input_window_index[i];
|
input_index[i] = input_index_clamped[i] + input_window_index[i];
|
||||||
|
@ -496,9 +496,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
|
|||||||
proto.convolution_dimension_numbers());
|
proto.convolution_dimension_numbers());
|
||||||
}
|
}
|
||||||
custom_call_instr->set_feature_group_count(
|
custom_call_instr->set_feature_group_count(
|
||||||
std::max(static_cast<int64>(proto.feature_group_count()), 1LL));
|
std::max(static_cast<int64>(proto.feature_group_count()), int64{1}));
|
||||||
custom_call_instr->set_batch_group_count(
|
custom_call_instr->set_batch_group_count(
|
||||||
std::max(static_cast<int64>(proto.batch_group_count()), 1LL));
|
std::max(static_cast<int64>(proto.batch_group_count()), int64{1}));
|
||||||
custom_call_instr->set_custom_call_has_side_effect(
|
custom_call_instr->set_custom_call_has_side_effect(
|
||||||
proto.custom_call_has_side_effect());
|
proto.custom_call_has_side_effect());
|
||||||
break;
|
break;
|
||||||
|
@ -313,7 +313,7 @@ XlaOp SolveWithInvertedDiagonalBlocks(XlaOp a, XlaOp b, XlaOp inv_diag_blocks,
|
|||||||
// (namely, X[i * block_size:] = 0), L[i, :i] @ X[:i]
|
// (namely, X[i * block_size:] = 0), L[i, :i] @ X[:i]
|
||||||
if (backward) {
|
if (backward) {
|
||||||
start = {j * block_size,
|
start = {j * block_size,
|
||||||
std::max(0LL, (num_blocks - i) * block_size)};
|
std::max(int64{0}, (num_blocks - i) * block_size)};
|
||||||
end = {k, n};
|
end = {k, n};
|
||||||
} else {
|
} else {
|
||||||
start = {j * block_size, 0};
|
start = {j * block_size, 0};
|
||||||
|
@ -1032,7 +1032,7 @@ ShapeUtil::InsertedOrDeleted1SizedDimensions(const Shape& shape_pre,
|
|||||||
// Check (modified) dimensions between unmodified_dims[i-1] and
|
// Check (modified) dimensions between unmodified_dims[i-1] and
|
||||||
// unmodified_dims[i].
|
// unmodified_dims[i].
|
||||||
auto prior_unmodified_dim_pair =
|
auto prior_unmodified_dim_pair =
|
||||||
i > 0 ? unmodified_dims[i - 1] : std::make_pair(-1LL, -1LL);
|
i > 0 ? unmodified_dims[i - 1] : std::pair<int64, int64>(-1, -1);
|
||||||
auto unmodified_dim_pair =
|
auto unmodified_dim_pair =
|
||||||
i < unmodified_dims.size()
|
i < unmodified_dims.size()
|
||||||
? unmodified_dims[i]
|
? unmodified_dims[i]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user