fix typos in compiler dir
This commit is contained in:
parent
cacb484998
commit
fa5da7d547
tensorflow/compiler
jit
mlir
hlo
lite
tensorflow
transforms
translate
utils
tf2tensorrt/kernels
tf2xla/kernels
xla
g3doc
literal_comparison.ccpython/tpu_driver/client
service
copy_insertion_test.ccdynamic_padder.cc
gpu
hlo_parser.ccmlir_gpu/experimental/conv_emitter
tuple_points_to_analysis_test.cctests
@ -192,11 +192,11 @@ void AllocateAndParseFlags() {
|
||||
"XLA clusters."),
|
||||
Flag("tf_xla_check_cluster_input_numerics",
|
||||
&build_ops_flags->tf_xla_check_cluster_input_numerics,
|
||||
"If true then insert CheckNumerics nodes to to check all cluster "
|
||||
"If true then insert CheckNumerics nodes to check all cluster "
|
||||
"inputs."),
|
||||
Flag("tf_xla_check_cluster_output_numerics",
|
||||
&build_ops_flags->tf_xla_check_cluster_output_numerics,
|
||||
"If true then insert CheckNumerics nodes to to check all cluster "
|
||||
"If true then insert CheckNumerics nodes to check all cluster "
|
||||
"outputs."),
|
||||
Flag("tf_xla_disable_constant_folding",
|
||||
&build_ops_flags->tf_xla_disable_constant_folding,
|
||||
|
@ -741,7 +741,7 @@ static LogicalResult Verify(BroadcastInDimOp op) {
|
||||
if (dimIndex >= resultRank) {
|
||||
return op.emitOpError(
|
||||
llvm::formatv("broadcast_dimensions contains invalid value {0} for "
|
||||
"result result with rank {1}",
|
||||
"result with rank {1}",
|
||||
dimIndex, resultRank));
|
||||
}
|
||||
|
||||
@ -828,7 +828,7 @@ static LogicalResult Verify(DynamicBroadcastInDimOp op) {
|
||||
if (dimIndex >= resultRank) {
|
||||
return op.emitOpError(
|
||||
llvm::formatv("broadcast_dimensions contains invalid value {0} for "
|
||||
"result result with rank {1}",
|
||||
"result with rank {1}",
|
||||
dimIndex, resultRank));
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ func @broadcast_in_dim_bad_rank_decrease(%arg0: tensor<1x2x3xi32>) -> tensor<3xi
|
||||
// -----
|
||||
|
||||
func @broadcast_in_dim_dimension_values_too_large(%arg0: tensor<1x2xi32>) -> tensor<1x2x3xi32> {
|
||||
// expected-error@+1 {{broadcast_dimensions contains invalid value 9 for result result with rank 3}}
|
||||
// expected-error@+1 {{broadcast_dimensions contains invalid value 9 for result with rank 3}}
|
||||
%0 = "mhlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<[9, 2]> : tensor<2xi64>} : (tensor<1x2xi32>) -> tensor<1x2x3xi32>
|
||||
return %0 : tensor<1x2x3xi32>
|
||||
}
|
||||
@ -1029,7 +1029,7 @@ func @sort(%input0: tensor<16x16xf32>, %input1: tensor<16x16xi32>) {
|
||||
// -----
|
||||
|
||||
func @sort_no_operands() {
|
||||
// expected-error @+1 {{expected named operation to have atleast 1 result}}
|
||||
// expected-error @+1 {{expected named operation to have at least 1 result}}
|
||||
%0:0 = "mhlo.sort"() ( {
|
||||
^bb0(%arg1: tensor<f32>, %arg2: tensor<f32>, %arg3: tensor<i32>, %arg4: tensor<i32>):
|
||||
%7 = "mhlo.compare"(%arg1, %arg2) {comparison_direction = "GT"} : (tensor<f32>, tensor<f32>) -> tensor<i1>
|
||||
|
@ -201,7 +201,7 @@ static void EmitOperatorBuilders(const std::vector<Record *> &defs,
|
||||
} else {
|
||||
os << " tflite::BuiltinOptions_NONE, /*builtin_options=*/0,\n";
|
||||
}
|
||||
// Only builtin ops' builders are auto-generated. custom_options are only
|
||||
// Only built-in ops' builders are auto-generated. custom_options are only
|
||||
// used by custom or flex ops and those ops are handled manually.
|
||||
os << " /*custom_options=*/0, "
|
||||
<< "tflite::CustomOptionsFormat_FLEXBUFFERS,\n"
|
||||
@ -219,7 +219,7 @@ static inline std::string GetOperatorName(const Record &def) {
|
||||
return name.upper();
|
||||
}
|
||||
|
||||
// Emits a function that returns builtin operator code for each TFLite op.
|
||||
// Emits a function that returns built-in operator code for each TFLite op.
|
||||
//
|
||||
// The signature of the function is:
|
||||
//
|
||||
@ -489,16 +489,16 @@ static bool RuntimeVerifierWriterMain(raw_ostream &os, RecordKeeper &records) {
|
||||
|
||||
for (int i = 0, e = op.getNumOperands(); i < e; ++i) {
|
||||
auto &value = op.getOperand(i);
|
||||
// Skip from from first variadic operands for now. Else getOperand index
|
||||
// used below doesn't match.
|
||||
// Skip from first variadic operands for now. Else getOperand index used
|
||||
// below doesn't match.
|
||||
if (value.isVariableLength()) break;
|
||||
if (!value.name.empty())
|
||||
verify_ctx.addSubst(value.name, formatv("op->getOperand({0})", i));
|
||||
}
|
||||
for (int i = 0, e = op.getNumResults(); i < e; ++i) {
|
||||
auto &value = op.getResult(i);
|
||||
// Skip from from first variadic results for now. Else getResult index
|
||||
// used below doesn't match.
|
||||
// Skip from first variadic results for now. Else getResult index used
|
||||
// below doesn't match.
|
||||
if (value.isVariableLength()) break;
|
||||
if (!value.name.empty())
|
||||
verify_ctx.addSubst(value.name, formatv("op->getResult({0})", i));
|
||||
|
@ -202,7 +202,7 @@ bool VerifyMulOpShapeConstraints(MulOp op) {
|
||||
auto element_type = getElementTypeOrSelf(op.output().getType());
|
||||
|
||||
// Allows QI8 and QUI8 inputs up to five dimension broadcasting unless the
|
||||
// output type is not QI16. If the output type is Q16, allows onlt the same
|
||||
// output type is not QI16. If the output type is Q16, allows only the same
|
||||
// shape operands.
|
||||
if (IsQI8Type(element_type) || IsQUI8Type(element_type)) {
|
||||
if (IsQI16Type(getElementTypeOrSelf(op.lhs().getType()))) {
|
||||
@ -859,9 +859,9 @@ static void BuildGatherOp(OpBuilder *builder, OperationState &result,
|
||||
axis_i += params_rank;
|
||||
}
|
||||
|
||||
// params must be atleast rank axis + 1
|
||||
// params must be at least rank axis + 1
|
||||
if (params_rank < axis_i + 1) {
|
||||
emitError(result.location, "params must be atleast rank axis + 1");
|
||||
emitError(result.location, "params must be at least rank axis + 1");
|
||||
}
|
||||
|
||||
if (indices_rank == 0) {
|
||||
@ -1324,7 +1324,7 @@ TFL::ConstOp NarrowDownInt64InputValuesForOp(Operation *input_op,
|
||||
return builder->create<TFL::ConstOp>(loc, new_value_i32_attr);
|
||||
}
|
||||
|
||||
// This will cast donw int64 values for TFL slice op.
|
||||
// This will cast down int64 values for TFL slice op.
|
||||
// This will require the begin & size are constants.
|
||||
struct CastDonwInt64BeginEndToInt32 : public OpRewritePattern<TFL::SliceOp> {
|
||||
using OpRewritePattern<TFL::SliceOp>::OpRewritePattern;
|
||||
@ -1484,7 +1484,7 @@ LogicalResult UnpackOp::inferReturnTypes(
|
||||
|
||||
if (input_type.hasStaticShape() && input_type.getNumElements() <= 0) {
|
||||
return emitOptionalError(
|
||||
loc, "number of elements in input shoule be larger than 0");
|
||||
loc, "number of elements in input should be larger than 0");
|
||||
}
|
||||
|
||||
const int64_t rank = input_type.getRank();
|
||||
|
@ -83,7 +83,7 @@ LogicalResult QuantizeContext::Handle(
|
||||
auto spec = target_spec_.GetKernelSpec(op.logical_kernel(), signature);
|
||||
if (!spec.hasValue()) {
|
||||
op.emitWarning(
|
||||
"Couldn't find kernel from the registeration for quantization.");
|
||||
"Couldn't find kernel from the registration for quantization.");
|
||||
return success();
|
||||
}
|
||||
switch (spec->type) {
|
||||
@ -189,7 +189,7 @@ void QuantizeContext::DumpStates(QuantizeRegionOp current_op) {
|
||||
// - use the first one in the collection,
|
||||
// - use the single input if it is ready, or,
|
||||
// - use the single output if it is ready, or,
|
||||
// - use use the first ready one in the collection.
|
||||
// - use the first ready one in the collection.
|
||||
QuantParams QuantizeContext::GetQuantParamsForSameScaleConstraint(
|
||||
Operation *op) {
|
||||
// Two vector to collect Non-empty operands and results states.
|
||||
|
@ -61,7 +61,7 @@ struct RequantizeState {
|
||||
QuantParams params;
|
||||
};
|
||||
|
||||
// This class manages all the intermedaite quantization states.
|
||||
// This class manages all the intermediate quantization states.
|
||||
class QuantizeContext {
|
||||
public:
|
||||
QuantizeContext(FuncOp func, const DeviceTarget &spec);
|
||||
@ -70,7 +70,7 @@ class QuantizeContext {
|
||||
std::vector<quant::QuantizeRegionOp> GetAllOps();
|
||||
|
||||
// For each quant region op, propagates its quantization parameters according
|
||||
// to the kernel specification and also returns the adjcent quant region ops
|
||||
// to the kernel specification and also returns the adjacent quant region ops
|
||||
// which get the new quantization parameters propagated.
|
||||
LogicalResult Handle(quant::QuantizeRegionOp op,
|
||||
llvm::SmallVectorImpl<Operation *> *new_items,
|
||||
@ -118,10 +118,10 @@ class QuantizeContext {
|
||||
// - use the first one in the collection,
|
||||
// - use the single input if it is ready, or,
|
||||
// - use the single output if it is ready, or,
|
||||
// - use use the first ready one in the collection.
|
||||
// - use the first ready one in the collection.
|
||||
QuantParams GetQuantParamsForSameScaleConstraint(Operation *op);
|
||||
|
||||
// Propagate `params` to all the quantizable port of the `op`. The adjcent
|
||||
// Propagate `params` to all the quantizable port of the `op`. The adjacent
|
||||
// ops, which have the parameters propagated to, are collected by `new_items`,
|
||||
// so they can be added to the working queue. `changed` is set to true if
|
||||
// there are any new elements being added to `new_items`.
|
||||
|
@ -575,7 +575,7 @@ void QuantizationDriver::RequantizeValue(Value value, RequantizeState *state,
|
||||
// - use the first one in the collection,
|
||||
// - use the single input if it is ready, or,
|
||||
// - use the single output if it is ready, or,
|
||||
// - use use the first ready one in the collection.
|
||||
// - use the first ready one in the collection.
|
||||
QuantParams QuantizationDriver::GetQuantParamsForSameScaleConstraint(
|
||||
Operation *op) {
|
||||
// Two vector to collect Non-empty operands and results states.
|
||||
@ -653,7 +653,7 @@ void QuantizationDriver::PreprocessConstantOps() {
|
||||
if (biases.find(operand_num) == biases.end() &&
|
||||
!llvm::dyn_cast<mlir::SameScalesOpInterface>(user) &&
|
||||
!llvm::dyn_cast<quant::QuantizeCastOp>(user)) {
|
||||
// Needs to scan the content to get the quantiztion parameters if there
|
||||
// Needs to scan the content to get the quantization parameters if there
|
||||
// are no quantization parameters (FakeQuant ops).
|
||||
// For this case, the weight isn't duplicated.
|
||||
weights_.insert(cst);
|
||||
@ -780,7 +780,7 @@ bool QuantizationDriver::PropagateParams() {
|
||||
// Use the final state to set all the operands' parameters.
|
||||
for (int i = 0, e = op->getNumOperands(); i != e; ++i) {
|
||||
if (auto type = op->getOperand(i).getType().dyn_cast<ShapedType>()) {
|
||||
// Without this check, it will accidently propagate the quantization
|
||||
// Without this check, it will accidentally propagate the quantization
|
||||
// information by the shared non-float tensors.
|
||||
if (type.getElementType().isa<FloatType>())
|
||||
changed |= SetOperandParams(op, i, params);
|
||||
@ -790,7 +790,7 @@ bool QuantizationDriver::PropagateParams() {
|
||||
// Use the final state to set all the results' parameters.
|
||||
for (int res = 0, e = op->getNumResults(); res != e; ++res)
|
||||
if (auto type = op->getResult(res).getType().dyn_cast<ShapedType>()) {
|
||||
// Without this check, it will accidently propagate the quantization
|
||||
// Without this check, it will accidentally propagate the quantization
|
||||
// information by the shared non-float-tensors.
|
||||
if (type.getElementType().isa<FloatType>())
|
||||
changed |= SetResultParams(op, res, params);
|
||||
|
@ -430,7 +430,7 @@ TypeAttr RescaleQuantizedType(Type input, Attribute factor);
|
||||
// if it is using signed int symmetric quantization.
|
||||
//
|
||||
// Note that this method may broadcast min and max to match the dimension length
|
||||
// of `input_type`, if the the `quant_dim` is valid. On the other hand, the
|
||||
// of `input_type`, if the `quant_dim` is valid. On the other hand, the
|
||||
// symmetry of min and max is not adjusted by this method. The QAT workflow
|
||||
// should set min/max correctly (and use `narrow_range`=true, `is_signed`=true)
|
||||
// if symmetric quantization is required.
|
||||
|
@ -118,7 +118,7 @@ func @Int64SliceBeginSize(%arg0: tensor<4x128x32xf32>) -> tensor<1x128x32xf32> {
|
||||
// Make sure that second output of the tf.while is not incorrectly inferred as
|
||||
// pass through just because the corresponding input is not used in either
|
||||
// condition or body. The tensor<f32> result of the loop can be either %arg1
|
||||
// (if the body never executes, or 22.0 if the body executes atleast once).
|
||||
// (if the body never executes, or 22.0 if the body executes at least once).
|
||||
func @WhileCanonicalizeBug(%arg0: tensor<i32>, %arg1: tensor<f32>) -> tensor<f32> {
|
||||
%0:2 = "tfl.while"(%arg0, %arg1) ( {
|
||||
^bb0(%arg2: tensor<i32>, %arg3: tensor<f32>):
|
||||
|
@ -113,8 +113,8 @@ class PrepareQuantizePass
|
||||
}
|
||||
|
||||
// Get the min and max values from the quantization specification for the
|
||||
// current function function and argument index. Uses default values if
|
||||
// the function is specified in the `quantize_allowlist`.
|
||||
// current function and argument index. Uses default values if the function
|
||||
// is specified in the `quantize_allowlist`.
|
||||
std::pair<llvm::Optional<double>, llvm::Optional<double>>
|
||||
GetMinMaxValuesForArgument(llvm::StringRef func_name, int index) {
|
||||
if (func_name == quant_specs_.target_func) {
|
||||
@ -124,8 +124,8 @@ class PrepareQuantizePass
|
||||
}
|
||||
}
|
||||
|
||||
// Apply some sanity check and report some warnings for those don't follow
|
||||
// the best quantization practise. This also fixes some simple violations.
|
||||
// Apply some sanity check and report some warnings for those who don't follow
|
||||
// the best quantization practice. This also fixes some simple violations.
|
||||
void SanityCheckAndAdjustment(FuncOp func);
|
||||
|
||||
// Whether the func contains Quantize ops. This is used to determine whether
|
||||
@ -252,13 +252,13 @@ void PrepareQuantizePass::SanityCheckAndAdjustment(FuncOp func) {
|
||||
// Check for (Quant (Dequant $in), $qA) "qdq" pairs that couldn't be
|
||||
// eliminated at this point. This only occurs for the pattern
|
||||
// (Quant (Dequant (Quant $in, $qB)), $qA) $qB != $qA
|
||||
// where the qdq pair denotes a non-trivial requantiziion of an
|
||||
// alreadyquantized value. Since this makes little sense (directly quantizing
|
||||
// (Quant $in, $qA) would introduce less quantization noise) the likley cause
|
||||
// where the qdq pair denotes a non-trivial requantization of an
|
||||
// already quantized value. Since this makes little sense (directly quantizing
|
||||
// (Quant $in, $qA) would introduce less quantization noise) the likely cause
|
||||
// is an minor error in constructing the original network model that
|
||||
// introduced back-to-back Fake Quantization operations. Hence: emit a
|
||||
// warning. N.b. at this point weŕe (teporarility) in the quantization dialect
|
||||
// (presuambly enalbe re-use in xla etc) quant::*QuantizeCastOp weŕe matching
|
||||
// (presumably enable re-use in xla etc) quant::*QuantizeCastOp weŕe matching
|
||||
// here.
|
||||
//
|
||||
func.walk([&](quant::QuantizeCastOp q_op) {
|
||||
@ -271,9 +271,9 @@ void PrepareQuantizePass::SanityCheckAndAdjustment(FuncOp func) {
|
||||
auto dq_arg = dq_op.getOperand();
|
||||
|
||||
if (!dq_arg.hasOneUse()) {
|
||||
// The initial quanization is used sompleace else ... so it might be
|
||||
// The initial quantization is used someplace else ... so it might be
|
||||
// reasonable for it to requantized for another purpose.
|
||||
// TODO: ideally would want to still check whether requanization narrows
|
||||
// TODO: ideally would want to still check whether requantization narrows
|
||||
// rather than widens the representation
|
||||
return;
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ LogicalResult ConvertNMSPaddedFunc::VerifySignature() {
|
||||
if (func_.getNumArguments() < 5) {
|
||||
return func_.emitError()
|
||||
<< "Invalid number of arguments to "
|
||||
"non_max_suppression_padded_v2 (need atleast 5): "
|
||||
"non_max_suppression_padded_v2 (need at least 5): "
|
||||
<< func_.getNumArguments();
|
||||
}
|
||||
if (func_.getType().getNumResults() != 2) {
|
||||
|
@ -43,7 +43,7 @@ namespace tf_executor {
|
||||
namespace {
|
||||
|
||||
// IslandType is an enum representing if an island is the island (parent)
|
||||
// merging another island or is the island (child) being being merged.
|
||||
// merging another island or is the island (child) being merged.
|
||||
enum IslandType { kParentIsland, kChildIsland };
|
||||
|
||||
// IslandResult is a helper struct holding an islands result and associated
|
||||
|
@ -333,7 +333,7 @@ LogicalResult RegionControlFlowToFunctional::ConvertWhileOp(
|
||||
WhileRegionOp while_region) {
|
||||
// For While, the arguments of the calls in the body and cond regions match
|
||||
// if they are region arguments with the same region argument numbers. If the
|
||||
// 2 calls have the same value (an extern value) used an an argument, we
|
||||
// 2 calls have the same value (an extern value) used as an argument, we
|
||||
// cannot do a trivial transformation because post transform, we will need to
|
||||
// pass this extern value as an argument to the function, so we cannot use the
|
||||
// existing function as is.
|
||||
|
@ -120,7 +120,7 @@ namespace {
|
||||
// return %read
|
||||
// }
|
||||
//
|
||||
// will be be transformed to:
|
||||
// will be transformed to:
|
||||
//
|
||||
// func @cluster_with_loop() {
|
||||
// %0 = "tf.VarHandleOp"() ...
|
||||
@ -245,7 +245,7 @@ class RegionResourceHoister {
|
||||
// Returns all resources accessed by the regions attached the op.
|
||||
auto& GetResources() { return resources_; }
|
||||
|
||||
// Returns if the given value is a resouce that needs lifting.
|
||||
// Returns if the given value is a resource that needs lifting.
|
||||
bool Contains(Value resource) const {
|
||||
return resources_.find(resource) != resources_.end();
|
||||
}
|
||||
@ -379,7 +379,7 @@ LogicalResult RegionResourceHoister::Analyze() {
|
||||
// If the user is not in one of the regions, we are not interested in it.
|
||||
// Since all the sub-regions within this region (i.e., regions attached to
|
||||
// op's in this region) have themselves gone through lifting, all resource
|
||||
// users are expected to be operations in this region and and not embedded
|
||||
// users are expected to be operations in this region and not embedded
|
||||
// within other sub-regions attached to op's in this region. So the check
|
||||
// for whether a user is in one of the regions attached to this op is
|
||||
// straightforward.
|
||||
@ -531,7 +531,7 @@ void RegionResourceHoister::ReplaceOpWithNewOp() {
|
||||
new_result_types.insert(new_result_types.end(), extra_result_types.begin(),
|
||||
extra_result_types.end());
|
||||
OpBuilder builder(op_);
|
||||
// Clone ths old operation but with new result types.
|
||||
// Clone this old operation but with new result types.
|
||||
Operation* new_op = Operation::create(
|
||||
op_->getLoc(), op_->getName(), new_result_types, op_->getOperands(),
|
||||
op_->getAttrs(), op_->getSuccessors(), op_->getNumRegions());
|
||||
@ -808,7 +808,7 @@ LogicalResult LiftArgRetResourcesForFunction(
|
||||
// value to be written.
|
||||
|
||||
// Now create read values that will be used to replace each resource that
|
||||
// is read in the function body. These read vaulues are just the same argument
|
||||
// is read in the function body. These read values are just the same argument
|
||||
// with type replaced.
|
||||
llvm::SmallVector<Value, 4> skipped_args;
|
||||
for (auto& it : hoister.GetResources()) {
|
||||
|
@ -39,7 +39,7 @@ extern llvm::cl::opt<bool> prune_unused_nodes;
|
||||
extern llvm::cl::opt<bool> convert_legacy_fed_inputs;
|
||||
extern llvm::cl::opt<bool> graph_as_function;
|
||||
extern llvm::cl::opt<bool> upgrade_legacy;
|
||||
// TODO(jpienaar): Temporary flag, flip default and and remove.
|
||||
// TODO(jpienaar): Temporary flag, flip default and remove.
|
||||
extern llvm::cl::opt<bool> enable_shape_inference;
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSLATE_TF_MLIR_TRANSLATE_CL_H_
|
||||
|
@ -50,7 +50,7 @@ string MangleTensor(const TensorProto& tensor);
|
||||
// Demangle a string mangled with MangleTensor.
|
||||
Status DemangleTensor(absl::string_view str, TensorProto* proto);
|
||||
|
||||
// Return a DataType mangled as as string.
|
||||
// Return a DataType mangled as a string.
|
||||
string MangleDataType(const DataType& dtype);
|
||||
// Demangle a string mangled with MangleDataType.
|
||||
Status DemangleDataType(absl::string_view str, DataType* proto);
|
||||
|
@ -60,7 +60,7 @@ static llvm::cl::opt<bool> import_saved_model_object_graph(
|
||||
static llvm::cl::opt<bool> import_saved_model_signature_defs(
|
||||
"savedmodel-signaturedefs-to-mlir",
|
||||
llvm::cl::desc(
|
||||
"Import a saved model's SignatureDefs to to their MLIR representation"),
|
||||
"Import a saved model's SignatureDefs to their MLIR representation"),
|
||||
llvm::cl::value_desc("dir"));
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
|
@ -554,7 +554,7 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
|
||||
// * Logic in TF 2.0:
|
||||
// - During conversion: similar to 1.x.
|
||||
// - During inference: calibration_data will still be empty, but cache will
|
||||
// contain the the calibrated engine, so it won't trigger calibration.
|
||||
// contain the calibrated engine, so it won't trigger calibration.
|
||||
//
|
||||
// TODO(laigd): consider the following alternatives:
|
||||
// 1. Serialize the state (calibration or inference) using
|
||||
|
@ -309,7 +309,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
|
||||
// 2. The op inputs at these indices are compile time constants.
|
||||
//
|
||||
// These compile time consts do not appear as _Args in the cond/body functions
|
||||
// and are replaced by kConstant nodes instead. As as result, the compiled
|
||||
// and are replaced by kConstant nodes instead. As a result, the compiled
|
||||
// body function does not have matching input and output shape. We fix this
|
||||
// by rewriting the body computation (see body_wrapper below) to output
|
||||
// just the non compile-time-const values and later pad up the while output
|
||||
@ -531,7 +531,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
|
||||
|
||||
// Set dynamic dimension size to 0 for element value. Inside the while
|
||||
// loop, TensorlistSetItem will properly set the element shape's
|
||||
// dynamic diemnsion.
|
||||
// dynamic dimension.
|
||||
for (int64 dim = 1; dim < shape.dimensions_size(); ++dim) {
|
||||
int32 dim_size = shape.dimensions(dim);
|
||||
if (shape.is_dynamic_dimension(dim)) {
|
||||
|
@ -375,7 +375,7 @@ The `operand` is broadcast to the shape described by `out_dim_size`.
|
||||
`broadcast_dimensions` maps the dimensions of `operand` to the dimensions of the
|
||||
target shape, i.e. the i'th dimension of the operand is mapped to the
|
||||
broadcast_dimension\[i\]'th dimension of the output shape. The dimensions of
|
||||
`operand` must have size 1 or be the same size as the dimension in in the output
|
||||
`operand` must have size 1 or be the same size as the dimension in the output
|
||||
shape they are mapped to. The remaining dimensions are filled with dimensions of
|
||||
size 1. Degenerate-dimension broadcasting then broadcasts along these degenerate
|
||||
dimensions to reach the output shape. The semantics are described in detail on
|
||||
@ -1330,7 +1330,7 @@ array with the same shape. It is allowed for `operand` to be a scalar (rank 0).
|
||||
The XLA FFT operation implements the forward and inverse Fourier Transforms for
|
||||
real and complex inputs/outputs. Multidimensional FFTs on up to 3 axes are
|
||||
supported, except on TPU, where only a single axis is supported (please file a
|
||||
github issue if you require higher order).
|
||||
GitHub issue if you require higher order).
|
||||
|
||||
See also
|
||||
[`XlaBuilder::Fft`](https://www.tensorflow.org/code/tensorflow/compiler/xla/client/xla_builder.h).
|
||||
|
@ -397,7 +397,7 @@ class NearComparator {
|
||||
(error_.relaxed_nans && !IsNan(expected) && IsNan(actual))) {
|
||||
num_nan_mismatches_++;
|
||||
// A nan mismatch is considered to have infinite error. rel_error is
|
||||
// used for sorting a std::set of the top mismatchs, and a nan value
|
||||
// used for sorting a std::set of the top mismatches, and a nan value
|
||||
// here will result in undefined behavior because nan's do not satisfy
|
||||
// the strict weak ordering requirement of std containers.
|
||||
abs_error = std::numeric_limits<float>::infinity();
|
||||
@ -625,7 +625,7 @@ class NearComparator {
|
||||
// Callback to invoke on miscompare.
|
||||
MiscompareCallback miscompare_callback_;
|
||||
|
||||
// Number of element element mismatches encountered so far.
|
||||
// Number of element mismatches encountered so far.
|
||||
int64 num_mismatches_ = 0;
|
||||
|
||||
// Number of elements with a nan mismatch.
|
||||
|
@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// Before you start, make sure libtpu.so, libtpu.h and and libtpu_client.c are
|
||||
// in the same working directory.
|
||||
// Before you start, make sure libtpu.so, libtpu.h and libtpu_client.c are in
|
||||
// the same working directory.
|
||||
//
|
||||
// To compile: gcc -o libtpu_client libtpu_client.c -ldl
|
||||
// To run: sudo ./libtpu_client
|
||||
|
@ -1468,7 +1468,7 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinctUsedByTwoWhileLoops) {
|
||||
auto loop_init = builder.AddInstruction(
|
||||
HloInstruction::CreateTuple({iter_param, data_param, data_param}));
|
||||
|
||||
// Two while loops shares the same loop init tuple.
|
||||
// Two while loops share the same loop init tuple.
|
||||
auto while_hlo1 = builder.AddInstruction(HloInstruction::CreateWhile(
|
||||
loop_state_shape, condition1, body1, loop_init));
|
||||
auto while_hlo2 = builder.AddInstruction(HloInstruction::CreateWhile(
|
||||
|
@ -303,7 +303,7 @@ HloInstruction* PadWithScalar(HloInstruction* inst, int64 dim,
|
||||
// [1,2,2,3,4,4] and subtract it with 1:
|
||||
// [0,1,1,2,3,3]
|
||||
//
|
||||
// 4.Use the the result of cumsum as gather indicies to rearrange the original
|
||||
// 4.Use the result of cumsum as gather indices to rearrange the original
|
||||
// data. Feed the original input [a,b,c,d,P,P] and indices into gather.
|
||||
//
|
||||
// operand [a,b,c,d,P,P], indices [0,1,1,2,3,3]
|
||||
@ -668,7 +668,7 @@ Status RewriteDynamicReshapeCombineInput(
|
||||
gather->shape(), gather, output_dynamic_size, output_dim));
|
||||
auto users = reshape->users();
|
||||
for (auto* user : users) {
|
||||
// Avoid cycles by not replacing the staic reshape and get_dimension_size.
|
||||
// Avoid cycles by not replacing the static reshape and get_dimension_size.
|
||||
if (user != reshape_static && user != output_dynamic_size) {
|
||||
TF_RETURN_IF_ERROR(reshape->ReplaceUseWith(user, gather));
|
||||
}
|
||||
@ -750,7 +750,7 @@ HloInstruction* RewriteInputWithDynamicPadding(
|
||||
auto* padding_dim = padding_configs.mutable_dimensions(input_spatial_dim);
|
||||
const int64 dilated_window_size = window_util::DilatedBound(
|
||||
window_dim->size(), window_dim->window_dilation());
|
||||
// Chosoe dilated window size as low padding and static padding_high +
|
||||
// Choose dilated window size as low padding and static padding_high +
|
||||
// padding_low as high padding to make sure the following dynamic slice is
|
||||
// valid.
|
||||
//
|
||||
@ -1513,8 +1513,8 @@ StatusOr<bool> DynamicPadder::Run(HloModule* module) {
|
||||
// it. We do this because we have two different APIs to express a dynamic
|
||||
// dimension:
|
||||
//
|
||||
// 1. Dynamic dimension as specificed directly in the shape -- Needed for
|
||||
// Pytorch.
|
||||
// 1. Dynamic dimension as specified directly in the shape -- Needed for
|
||||
// PyTorch.
|
||||
//
|
||||
// 2. Dynamic dimension using dynamic parameter binding object. This
|
||||
// is needed for tensorflow.
|
||||
|
@ -125,8 +125,8 @@ Status IrEmitter::EmitConstants(const HloComputation& computation,
|
||||
// merely preserves their names (like available_externally), we also need
|
||||
// to ensure that they stick around even if they're "unused".
|
||||
//
|
||||
// We may have to be more more clever here in the future if we notice that
|
||||
// we're keeping around too many globals because of their linkage.
|
||||
// We may have to be more clever here in the future if we notice that we're
|
||||
// keeping around too many globals because of their linkage.
|
||||
unsigned global_address_space = llvm_ir::GetGlobalMemoryAddressSpace(
|
||||
*ir_emitter_context_->llvm_module());
|
||||
|
||||
|
@ -96,7 +96,7 @@ string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
|
||||
"uses routines from libdevice.",
|
||||
hlo_module_config);
|
||||
|
||||
// GetCudaRootCandidates always includes ".", but but if everything fails, we
|
||||
// GetCudaRootCandidates always includes ".", but if everything fails, we
|
||||
// return it anyway. Better than returning the empty string.
|
||||
return ".";
|
||||
}
|
||||
@ -396,7 +396,7 @@ std::vector<uint8> NVPTXCompiler::CompileGpuAsmOrGetCachedResult(
|
||||
"--xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found "
|
||||
"to use the GPU driver for compiling ptx instead. However "
|
||||
"this option is discouraged and can lead to increased "
|
||||
"memory concumptions and other subtle runtime issues.";
|
||||
"memory consumptions and other subtle runtime issues.";
|
||||
}
|
||||
// Missing ptxas is expected in some environments where CUDA SDK
|
||||
// binaries are not available. We don't want to spam logs with
|
||||
|
@ -2527,7 +2527,7 @@ bool HloParserImpl::SetValueInLiteral(LocTy loc, std::complex<double> value,
|
||||
literal);
|
||||
default:
|
||||
LOG(FATAL) << PrimitiveType_Name(shape.element_type())
|
||||
<< " is not a complex type type";
|
||||
<< " is not a complex type";
|
||||
}
|
||||
}
|
||||
|
||||
@ -2549,7 +2549,7 @@ bool HloParserImpl::SetValueInLiteralHelper(LocTy loc, ParsedElemT value,
|
||||
|
||||
// Check that the index is in range and assign into the literal
|
||||
if (index >= ShapeUtil::ElementsIn(literal->shape())) {
|
||||
return Error(loc, StrCat("trys to set value ", StringifyValue(value),
|
||||
return Error(loc, StrCat("tries to set value ", StringifyValue(value),
|
||||
" to a literal in shape ",
|
||||
ShapeUtil::HumanString(literal->shape()),
|
||||
" at linear index ", index,
|
||||
@ -3560,7 +3560,7 @@ bool HloParserImpl::ParseWindow(Window* window, bool expect_outer_curlies) {
|
||||
}
|
||||
|
||||
// This is the inverse of HloInstruction::ConvolutionDimensionNumbersToString.
|
||||
// Thestring looks like "dim_labels=0bf_0io->0bf".
|
||||
// The string looks like "dim_labels=0bf_0io->0bf".
|
||||
bool HloParserImpl::ParseConvolutionDimensionNumbers(
|
||||
ConvolutionDimensionNumbers* dnums) {
|
||||
if (lexer_.GetKind() != TokKind::kDimLabels) {
|
||||
|
@ -82,7 +82,7 @@ void SetBoundForSimpleLoop(mlir::AffineForOp loop, mlir::AffineExpr new_bound,
|
||||
// * TileLoop always puts the tiling logic "stepping" logic into AffineExprs.
|
||||
// With that all index calculation is done in AffineExprs and easier to
|
||||
// analyze in a single place.
|
||||
// * TileLoop doesn't plan to use use max() and min() to resolve the issue when
|
||||
// * TileLoop doesn't plan to use max() and min() to resolve the issue when
|
||||
// N % X != 0. max() and min() are not representable in AffineExprs.
|
||||
// TODO(timshen): support the case where N % X != 0.
|
||||
//
|
||||
|
@ -596,7 +596,7 @@ TEST_F(TuplePointsToAnalysisTest, TupleWithBitcast) {
|
||||
|
||||
TEST_F(TuplePointsToAnalysisTest, PointsToTupleConstantElements) {
|
||||
// Construct a tuple constant and kCopy it. Verify the points-to set of the
|
||||
// copy correctly correctly points into the nested elements of the constant.
|
||||
// copy correctly points into the nested elements of the constant.
|
||||
auto builder = HloComputation::Builder(TestName());
|
||||
Literal elements[] = {LiteralUtil::CreateR2<float>({{1.0}, {2.0}}),
|
||||
LiteralUtil::CreateR1<float>({2.0, 42})};
|
||||
|
@ -183,7 +183,7 @@ class Exhaustive32BitOrLessUnaryTest
|
||||
return end - begin;
|
||||
}
|
||||
|
||||
// Generates all the input values for the test. The the range of the bit
|
||||
// Generates all the input values for the test. The range of the bit
|
||||
// representation of the input values is described by the test parameter as
|
||||
// a pair of int64 representing the starting bit pattern and the ending
|
||||
// pattern. Each bit representation is first truncated to the integral type of
|
||||
|
@ -35,8 +35,8 @@ PLATFORM_DEFINE_ID(kDummyTestId);
|
||||
constexpr char kDummyTriple[] = "dummy-triple";
|
||||
constexpr char kDummyLayout[] = "e";
|
||||
|
||||
// This class is is a dummy implementation of GpuCompiler and is targeted for
|
||||
// unit test only
|
||||
// This class is a dummy implementation of GpuCompiler and is targeted for unit
|
||||
// test only
|
||||
class GpuDummyCompiler : public GpuCompiler {
|
||||
public:
|
||||
GpuDummyCompiler() : GpuCompiler(kDummyTestId, kDummyTriple, kDummyLayout) {}
|
||||
|
Loading…
Reference in New Issue
Block a user