Internal change
PiperOrigin-RevId: 198582954
This commit is contained in:
parent
2bb9fe8d20
commit
8175595386
@ -38,7 +38,7 @@ class SimpleCostModel : public ParallelCostModel {
|
|||||||
const int64 min_cost_per_thread = 256LL << 10; // 256KB L2 Cache size.
|
const int64 min_cost_per_thread = 256LL << 10; // 256KB L2 Cache size.
|
||||||
// Return target parallel task count in [1, max_parallelism_].
|
// Return target parallel task count in [1, max_parallelism_].
|
||||||
return std::min(max_parallelism_,
|
return std::min(max_parallelism_,
|
||||||
std::max(1LL, instruction_cost / min_cost_per_thread));
|
std::max(int64{1}, instruction_cost / min_cost_per_thread));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -63,7 +63,7 @@ class DefaultCostModel : public ParallelCostModel {
|
|||||||
int64 max_parallelism;
|
int64 max_parallelism;
|
||||||
// Calculate flops-to-bytes-ratio for 'instruction'.
|
// Calculate flops-to-bytes-ratio for 'instruction'.
|
||||||
const int64 bytes_accessed =
|
const int64 bytes_accessed =
|
||||||
std::max(1LL, cost_analysis_->bytes_accessed(*instruction));
|
std::max(int64{1}, cost_analysis_->bytes_accessed(*instruction));
|
||||||
const float flops_to_bytes_ratio =
|
const float flops_to_bytes_ratio =
|
||||||
cost_analysis_->flop_count(*instruction) /
|
cost_analysis_->flop_count(*instruction) /
|
||||||
static_cast<float>(bytes_accessed);
|
static_cast<float>(bytes_accessed);
|
||||||
@ -93,7 +93,7 @@ class DefaultCostModel : public ParallelCostModel {
|
|||||||
}
|
}
|
||||||
// Return target parallel task count in [1, max_parallelism_].
|
// Return target parallel task count in [1, max_parallelism_].
|
||||||
return std::min(max_parallelism,
|
return std::min(max_parallelism,
|
||||||
std::max(1LL, instruction_cost / min_cost_per_thread));
|
std::max(int64{1}, instruction_cost / min_cost_per_thread));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -115,7 +115,7 @@ ShapePartitionIterator::ShapePartitionIterator(
|
|||||||
for (int i = 0; i < dimension_partition_sizes_.size(); ++i) {
|
for (int i = 0; i < dimension_partition_sizes_.size(); ++i) {
|
||||||
const int64 dim_size = shape_.dimensions(dimensions_[i]);
|
const int64 dim_size = shape_.dimensions(dimensions_[i]);
|
||||||
dimension_partition_sizes_[i] =
|
dimension_partition_sizes_[i] =
|
||||||
std::max(1LL, dim_size / dimension_partition_counts_[i]);
|
std::max(int64{1}, dim_size / dimension_partition_counts_[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate the partition strides for each dimension.
|
// Calculate the partition strides for each dimension.
|
||||||
|
@ -1965,7 +1965,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
|
|||||||
// to oficially document different behavior.
|
// to oficially document different behavior.
|
||||||
for (int64 i = 0; i < start.size(); ++i) {
|
for (int64 i = 0; i < start.size(); ++i) {
|
||||||
start[i] = std::min<int64>(
|
start[i] = std::min<int64>(
|
||||||
std::max(0LL, start[i]),
|
std::max(int64{0}, start[i]),
|
||||||
operand_literal.shape().dimensions(i) - result_shape.dimensions(i));
|
operand_literal.shape().dimensions(i) - result_shape.dimensions(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -295,7 +295,7 @@ WeightedQuantilesStream<ValueType, WeightType, CompareFn>::GetQuantileSpecs(
|
|||||||
if (eps <= std::numeric_limits<double>::epsilon()) {
|
if (eps <= std::numeric_limits<double>::epsilon()) {
|
||||||
// Exact quantile computation at the expense of RAM.
|
// Exact quantile computation at the expense of RAM.
|
||||||
max_level = 1;
|
max_level = 1;
|
||||||
block_size = std::max(max_elements, 2LL);
|
block_size = std::max(max_elements, int64{2});
|
||||||
} else {
|
} else {
|
||||||
// The bottom-most level will become full at most
|
// The bottom-most level will become full at most
|
||||||
// (max_elements / block_size) times, the level above will become full
|
// (max_elements / block_size) times, the level above will become full
|
||||||
@ -315,7 +315,7 @@ WeightedQuantilesStream<ValueType, WeightType, CompareFn>::GetQuantileSpecs(
|
|||||||
block_size = static_cast<size_t>(ceil(max_level / eps)) + 1;
|
block_size = static_cast<size_t>(ceil(max_level / eps)) + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std::make_tuple(max_level, std::max(block_size, 2LL));
|
return std::make_tuple(max_level, std::max(block_size, int64{2}));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace quantiles
|
} // namespace quantiles
|
||||||
|
@ -195,7 +195,7 @@ class WeightedQuantilesSummary {
|
|||||||
// designed to be cache-friendly.
|
// designed to be cache-friendly.
|
||||||
void Compress(int64 size_hint, double min_eps = 0) {
|
void Compress(int64 size_hint, double min_eps = 0) {
|
||||||
// No-op if we're already within the size requirement.
|
// No-op if we're already within the size requirement.
|
||||||
size_hint = std::max(size_hint, 2LL);
|
size_hint = std::max(size_hint, int64{2});
|
||||||
if (entries_.size() <= size_hint) {
|
if (entries_.size() <= size_hint) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -267,7 +267,7 @@ class WeightedQuantilesSummary {
|
|||||||
if (entries_.empty()) {
|
if (entries_.empty()) {
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
num_quantiles = std::max(num_quantiles, 2LL);
|
num_quantiles = std::max(num_quantiles, int64{2});
|
||||||
output.reserve(num_quantiles + 1);
|
output.reserve(num_quantiles + 1);
|
||||||
|
|
||||||
// Make successive rank queries to get boundaries.
|
// Make successive rank queries to get boundaries.
|
||||||
|
@ -770,7 +770,7 @@ int64 MinSystemMemory(int64 available_memory) {
|
|||||||
} else {
|
} else {
|
||||||
// max(300 MiB, 0.05 * available_memory)
|
// max(300 MiB, 0.05 * available_memory)
|
||||||
min_system_memory =
|
min_system_memory =
|
||||||
std::max(314572800LL, static_cast<int64>(available_memory * 0.05));
|
std::max(int64{314572800}, static_cast<int64>(available_memory * 0.05));
|
||||||
}
|
}
|
||||||
#if defined(__GNUC__) && defined(__OPTIMIZE__)
|
#if defined(__GNUC__) && defined(__OPTIMIZE__)
|
||||||
// Do nothing
|
// Do nothing
|
||||||
|
@ -40,8 +40,8 @@ Status GetWindowedOutputSizeVerboseV2(int64 input_size, int64 filter_size,
|
|||||||
case Padding::SAME:
|
case Padding::SAME:
|
||||||
*output_size = (input_size + stride - 1) / stride;
|
*output_size = (input_size + stride - 1) / stride;
|
||||||
const int64 padding_needed =
|
const int64 padding_needed =
|
||||||
std::max(0LL, (*output_size - 1) * stride + effective_filter_size -
|
std::max(int64{0}, (*output_size - 1) * stride +
|
||||||
input_size);
|
effective_filter_size - input_size);
|
||||||
// For odd values of total padding, add more padding at the 'right'
|
// For odd values of total padding, add more padding at the 'right'
|
||||||
// side of the given dimension.
|
// side of the given dimension.
|
||||||
*padding_before = padding_needed / 2;
|
*padding_before = padding_needed / 2;
|
||||||
|
@ -84,7 +84,7 @@ class CholeskyGrad : public LinearAlgebraOp<Scalar> {
|
|||||||
Variables names representing the derivative matrix have a trailing '_bar'.
|
Variables names representing the derivative matrix have a trailing '_bar'.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const int64 block_begin = std::max(0ll, block_end - kMaxBlockSize);
|
const int64 block_begin = std::max(int64{0}, block_end - kMaxBlockSize);
|
||||||
const int64 block_size = block_end - block_begin;
|
const int64 block_size = block_end - block_begin;
|
||||||
const int64 trailing_size = kMatrixSize - block_end;
|
const int64 trailing_size = kMatrixSize - block_end;
|
||||||
|
|
||||||
|
@ -294,11 +294,11 @@ struct TransformFilterRange {
|
|||||||
|
|
||||||
// Compute number of filter shards.
|
// Compute number of filter shards.
|
||||||
const int64 residual_row =
|
const int64 residual_row =
|
||||||
std::max(0LL, args.filter_rows - base_filter_rows);
|
std::max(int64{0}, args.filter_rows - base_filter_rows);
|
||||||
const int64 shard_rows = 1 + (residual_row + 2 - 1) / 2;
|
const int64 shard_rows = 1 + (residual_row + 2 - 1) / 2;
|
||||||
|
|
||||||
const int64 residual_col =
|
const int64 residual_col =
|
||||||
std::max(0LL, args.filter_cols - base_filter_cols);
|
std::max(int64{0}, args.filter_cols - base_filter_cols);
|
||||||
const int64 shard_cols = 1 + (residual_col + 2 - 1) / 2;
|
const int64 shard_cols = 1 + (residual_col + 2 - 1) / 2;
|
||||||
|
|
||||||
// Compute strides to be used for input and output IO.
|
// Compute strides to be used for input and output IO.
|
||||||
@ -415,8 +415,9 @@ struct TransformFilters {
|
|||||||
filter_total_size + filter_transform_buffer_size + filter_out_buf_size;
|
filter_total_size + filter_transform_buffer_size + filter_out_buf_size;
|
||||||
|
|
||||||
// Remove fixed cost and divide by per-filter cost.
|
// Remove fixed cost and divide by per-filter cost.
|
||||||
const int64 num_filters_cache = std::max(
|
const int64 num_filters_cache =
|
||||||
1LL, (cache_size - filter_transform_matrix_size) / per_filter_cost);
|
std::max(int64{1},
|
||||||
|
(cache_size - filter_transform_matrix_size) / per_filter_cost);
|
||||||
const int64 num_filters_transform = std::min(out_depth, num_filters_cache);
|
const int64 num_filters_transform = std::min(out_depth, num_filters_cache);
|
||||||
|
|
||||||
// Allocate buffer for filter transform matrix:
|
// Allocate buffer for filter transform matrix:
|
||||||
@ -952,11 +953,11 @@ struct DeepConv2D<CPUDevice, T> {
|
|||||||
const int64 base_filter_rows = transform->filter_shape().rows;
|
const int64 base_filter_rows = transform->filter_shape().rows;
|
||||||
|
|
||||||
const int64 filter_residual_row =
|
const int64 filter_residual_row =
|
||||||
std::max(0LL, args.filter_rows - base_filter_rows);
|
std::max(int64{0}, args.filter_rows - base_filter_rows);
|
||||||
const int64 filter_shards_row = 1 + (filter_residual_row + 2 - 1) / 2;
|
const int64 filter_shards_row = 1 + (filter_residual_row + 2 - 1) / 2;
|
||||||
|
|
||||||
const int64 filter_residual_col =
|
const int64 filter_residual_col =
|
||||||
std::max(0LL, args.filter_cols - base_filter_rows);
|
std::max(int64{0}, args.filter_cols - base_filter_rows);
|
||||||
const int64 filter_shards_col = 1 + (filter_residual_col + 2 - 1) / 2;
|
const int64 filter_shards_col = 1 + (filter_residual_col + 2 - 1) / 2;
|
||||||
|
|
||||||
// Allocate buffer for transformed filters.
|
// Allocate buffer for transformed filters.
|
||||||
@ -1045,8 +1046,8 @@ struct DeepConv2D<CPUDevice, T> {
|
|||||||
buffer1_per_tile_size + buffer2_per_tile_size +
|
buffer1_per_tile_size + buffer2_per_tile_size +
|
||||||
packed_tile_per_tile_size + gemm_out_per_tile_size;
|
packed_tile_per_tile_size + gemm_out_per_tile_size;
|
||||||
|
|
||||||
const int64 num_tiles_cache =
|
const int64 num_tiles_cache = std::max(
|
||||||
std::max(4LL, (cache_size - total_fixed_cost) / total_per_tile_cost);
|
int64{4}, (cache_size - total_fixed_cost) / total_per_tile_cost);
|
||||||
const int64 num_tiles = std::min(num_tiles_cache, col_tiles);
|
const int64 num_tiles = std::min(num_tiles_cache, col_tiles);
|
||||||
|
|
||||||
// Allocate temporary buffer 'buffer1', which is first used for copying
|
// Allocate temporary buffer 'buffer1', which is first used for copying
|
||||||
|
@ -93,14 +93,14 @@ class DrawBoundingBoxesOp : public OpKernel {
|
|||||||
int64 color_index = bb % color_table_length;
|
int64 color_index = bb % color_table_length;
|
||||||
const int64 min_box_row =
|
const int64 min_box_row =
|
||||||
static_cast<float>(tboxes(b, bb, 0)) * (height - 1);
|
static_cast<float>(tboxes(b, bb, 0)) * (height - 1);
|
||||||
const int64 min_box_row_clamp = std::max<int64>(min_box_row, 0);
|
const int64 min_box_row_clamp = std::max<int64>(min_box_row, int64{0});
|
||||||
const int64 max_box_row =
|
const int64 max_box_row =
|
||||||
static_cast<float>(tboxes(b, bb, 2)) * (height - 1);
|
static_cast<float>(tboxes(b, bb, 2)) * (height - 1);
|
||||||
const int64 max_box_row_clamp =
|
const int64 max_box_row_clamp =
|
||||||
std::min<int64>(max_box_row, height - 1);
|
std::min<int64>(max_box_row, height - 1);
|
||||||
const int64 min_box_col =
|
const int64 min_box_col =
|
||||||
static_cast<float>(tboxes(b, bb, 1)) * (width - 1);
|
static_cast<float>(tboxes(b, bb, 1)) * (width - 1);
|
||||||
const int64 min_box_col_clamp = std::max<int64>(min_box_col, 0);
|
const int64 min_box_col_clamp = std::max<int64>(min_box_col, int64{0});
|
||||||
const int64 max_box_col =
|
const int64 max_box_col =
|
||||||
static_cast<float>(tboxes(b, bb, 3)) * (width - 1);
|
static_cast<float>(tboxes(b, bb, 3)) * (width - 1);
|
||||||
const int64 max_box_col_clamp = std::min<int64>(max_box_col, width - 1);
|
const int64 max_box_col_clamp = std::min<int64>(max_box_col, width - 1);
|
||||||
|
@ -71,7 +71,7 @@ class LRNFloatTest : public OpsTestBase {
|
|||||||
Eigen::Tensor<float, 1, Eigen::RowMajor> out_col(depth);
|
Eigen::Tensor<float, 1, Eigen::RowMajor> out_col(depth);
|
||||||
for (int64 d = 0; d < depth; ++d) {
|
for (int64 d = 0; d < depth; ++d) {
|
||||||
float denom = 0.0f;
|
float denom = 0.0f;
|
||||||
for (int64 r = std::max(0ll, d - depth_radius);
|
for (int64 r = std::max(int64{0}, d - depth_radius);
|
||||||
r < std::min(depth, d + depth_radius + 1); ++r) {
|
r < std::min(depth, d + depth_radius + 1); ++r) {
|
||||||
denom += in(i, r) * in(i, r);
|
denom += in(i, r) * in(i, r);
|
||||||
}
|
}
|
||||||
|
@ -159,7 +159,7 @@ struct MatrixBandPartFunctor<CPUDevice, Scalar> {
|
|||||||
const int64 band_start =
|
const int64 band_start =
|
||||||
num_lower_diags < 0
|
num_lower_diags < 0
|
||||||
? 0
|
? 0
|
||||||
: std::min(n, std::max(0ll, row - num_lower_diags));
|
: std::min(n, std::max(int64{0}, row - num_lower_diags));
|
||||||
const int64 band_end =
|
const int64 band_end =
|
||||||
num_upper_diags < 0
|
num_upper_diags < 0
|
||||||
? n
|
? n
|
||||||
|
@ -596,7 +596,7 @@ void SpatialAvgPool(OpKernelContext* context, Tensor* output,
|
|||||||
// so the factor 0.01 (i.e. 1/100) with a max of 10000, was chosen to limit
|
// so the factor 0.01 (i.e. 1/100) with a max of 10000, was chosen to limit
|
||||||
// the work unit cost to an operating range in which it emperically performed
|
// the work unit cost to an operating range in which it emperically performed
|
||||||
// best.
|
// best.
|
||||||
const int64 work_unit_cost = std::max(10000LL, work_unit_size / 100LL);
|
const int64 work_unit_cost = std::max(int64{10000}, work_unit_size / 100LL);
|
||||||
const DeviceBase::CpuWorkerThreads& worker_threads =
|
const DeviceBase::CpuWorkerThreads& worker_threads =
|
||||||
*(context->device()->tensorflow_cpu_worker_threads());
|
*(context->device()->tensorflow_cpu_worker_threads());
|
||||||
Shard(worker_threads.num_threads, worker_threads.workers,
|
Shard(worker_threads.num_threads, worker_threads.workers,
|
||||||
|
@ -273,8 +273,8 @@ inline void RequantizeManyInNewRangeReference(const qint32* input, int64 count,
|
|||||||
const int64 offset_intermediate = fp_value - output_offset_fp;
|
const int64 offset_intermediate = fp_value - output_offset_fp;
|
||||||
const int64 round_intermediate = offset_intermediate + rounding_delta;
|
const int64 round_intermediate = offset_intermediate + rounding_delta;
|
||||||
int64 quantized_int64 = round_intermediate >> fp_shift;
|
int64 quantized_int64 = round_intermediate >> fp_shift;
|
||||||
quantized_int64 = std::max(quantized_int64, 0LL);
|
quantized_int64 = std::max(quantized_int64, int64{0});
|
||||||
quantized_int64 = std::min(quantized_int64, 255LL);
|
quantized_int64 = std::min(quantized_int64, int64{255});
|
||||||
output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
|
output[index] = static_cast<quint8>(static_cast<int32>(quantized_int64));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -271,7 +271,7 @@ class ResizeAreaOp : public OpKernel {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
static EIGEN_ALWAYS_INLINE int64 Bound(int64 val, int64 limit) {
|
static EIGEN_ALWAYS_INLINE int64 Bound(int64 val, int64 limit) {
|
||||||
return std::min(limit - 1ll, std::max(0ll, val));
|
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool align_corners_;
|
bool align_corners_;
|
||||||
|
@ -57,7 +57,7 @@ const float* GetCoeffsTable() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline int64 Bound(int64 val, int64 limit) {
|
inline int64 Bound(int64 val, int64 limit) {
|
||||||
return std::min(limit - 1ll, std::max(0ll, val));
|
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct WeightsAndIndices {
|
struct WeightsAndIndices {
|
||||||
|
@ -81,7 +81,7 @@ class ResizeBicubicOpTest : public OpsTestBase {
|
|||||||
|
|
||||||
// Used in the baseline implementation
|
// Used in the baseline implementation
|
||||||
inline int64 Bound(int64 val, int64 limit) {
|
inline int64 Bound(int64 val, int64 limit) {
|
||||||
return std::min(limit - 1ll, std::max(0ll, val));
|
return std::min(limit - 1ll, std::max(int64{0}, val));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used in the baseline implementation
|
// Used in the baseline implementation
|
||||||
|
@ -125,7 +125,7 @@ class SparseFillEmptyRowsOp : public OpKernel {
|
|||||||
// Scratch here describes the number of elements in this dense row
|
// Scratch here describes the number of elements in this dense row
|
||||||
empty_row_indicator(row) = (scratch(row) == 0);
|
empty_row_indicator(row) = (scratch(row) == 0);
|
||||||
// In filled version, each row has at least one element.
|
// In filled version, each row has at least one element.
|
||||||
scratch(row) = std::max(scratch(row), 1LL);
|
scratch(row) = std::max(scratch(row), int64{1});
|
||||||
// Update scratch to represent the number of elements up to and
|
// Update scratch to represent the number of elements up to and
|
||||||
// including dense_row + 1:
|
// including dense_row + 1:
|
||||||
// scratch(0) == #{elements of row 0}
|
// scratch(0) == #{elements of row 0}
|
||||||
|
@ -51,7 +51,7 @@ void GcsThrottle::UpdateState() {
|
|||||||
// TODO(b/72643279): Switch to a monotonic clock.
|
// TODO(b/72643279): Switch to a monotonic clock.
|
||||||
int64 now = env_time_->NowSeconds();
|
int64 now = env_time_->NowSeconds();
|
||||||
uint64 delta_secs =
|
uint64 delta_secs =
|
||||||
std::max(0LL, now - static_cast<int64>(last_updated_secs_));
|
std::max(int64{0}, now - static_cast<int64>(last_updated_secs_));
|
||||||
available_tokens_ += delta_secs * config_.token_rate;
|
available_tokens_ += delta_secs * config_.token_rate;
|
||||||
available_tokens_ = std::min(available_tokens_, config_.bucket_size);
|
available_tokens_ = std::min(available_tokens_, config_.bucket_size);
|
||||||
last_updated_secs_ = now;
|
last_updated_secs_ = now;
|
||||||
|
@ -35,7 +35,7 @@ void Shard(int max_parallelism, thread::ThreadPool* workers, int64 total,
|
|||||||
workers->ParallelFor(total, cost_per_unit, work);
|
workers->ParallelFor(total, cost_per_unit, work);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cost_per_unit = std::max(1LL, cost_per_unit);
|
cost_per_unit = std::max(int64{1}, cost_per_unit);
|
||||||
// We shard [0, total) into "num_shards" shards.
|
// We shard [0, total) into "num_shards" shards.
|
||||||
// 1 <= num_shards <= num worker threads
|
// 1 <= num_shards <= num worker threads
|
||||||
//
|
//
|
||||||
|
Loading…
Reference in New Issue
Block a user