minor spelling tweaks
This commit is contained in:
parent
b41fbcbf85
commit
e664420b79
@ -509,10 +509,10 @@ RecursiveCompilabilityChecker::OperationFilter CreateOperationFilter(
|
||||
auto it = uncompilable_nodes->find(function_identifier);
|
||||
if (it == uncompilable_nodes->end()) {
|
||||
std::vector<RecursiveCompilabilityChecker::UncompilableNodeInfo>
|
||||
uncompileable_node_info{std::move(node_info)};
|
||||
uncompilable_node_info{std::move(node_info)};
|
||||
uncompilable_nodes->emplace(
|
||||
std::move(function_identifier),
|
||||
std::make_pair(function, std::move(uncompileable_node_info)));
|
||||
std::make_pair(function, std::move(uncompilable_node_info)));
|
||||
} else {
|
||||
it->second.second.emplace_back(std::move(node_info));
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ limitations under the License.
|
||||
// Symbolic > NonSymbolic. The lattice has height = 2 so two iterations are
|
||||
// sufficient to converge.
|
||||
//
|
||||
// We first do an optimisitc analysis and, if it does not converge, we then fall
|
||||
// We first do an optimistic analysis and, if it does not converge, we then fall
|
||||
// back to a pessimistic analysis. The optimistic analysis assigns the same
|
||||
// symbolic predicate to all the merge nodes whose preceding enter nodes have
|
||||
// the same frame name on the first iteration. On the second iteration, if all
|
||||
@ -1255,7 +1255,7 @@ Status DeadnessAnalysisImpl::GetFrameBasedTopologicalOrder(
|
||||
} else if (IsRootExit(node)) {
|
||||
++num_exits_for_frame[cf.frame_name];
|
||||
}
|
||||
// Edge NextIteration->Merge is counted before starting the traveral to
|
||||
// Edge NextIteration->Merge is counted before starting the traversal to
|
||||
// break the backedges.
|
||||
if (IsMerge(node)) {
|
||||
for (const Edge* e : node->in_edges()) {
|
||||
@ -1458,7 +1458,7 @@ Status DeadnessAnalysisImpl::PopulateFrame(absl::Span<Node* const> topo,
|
||||
|
||||
for (Node* n : topo) {
|
||||
// The nodes added to should_revisit in the previous loop need to be
|
||||
// revisited now. Reprocesing these initial nodes may add *their* consumers
|
||||
// revisited now. Reprocessing these initial nodes may add *their* consumers
|
||||
// to should_revisit, and these newly added nodes will also be processed by
|
||||
// this very same loop. Since we're traversing the graph in topological
|
||||
// order (producers before consumers) and HandleNode(n) can only ever add
|
||||
|
@ -95,7 +95,7 @@ extern const char* const kXlaNumResourceArgsAttr;
|
||||
extern const char* const kXlaHasReferenceVarsAttr;
|
||||
|
||||
// Sorts each node's control inputs by their names. This guarantees that for two
|
||||
// structually equivalent GraphDefs, we get the same traversal ordering on
|
||||
// structurally equivalent GraphDefs, we get the same traversal ordering on
|
||||
// node's control input fields.
|
||||
// TODO(hpucha): Move the utilities to a more appropriate place.
|
||||
void SortControlInputs(GraphDef* gdef);
|
||||
|
@ -72,7 +72,7 @@ extern const char kXlaLiftedArgOutsideCompilationAttrName[];
|
||||
|
||||
// Attribute indicating that this is an IdentityN node receiving inputs for a
|
||||
// outside compilation Placeholder node (the original outside compilation node
|
||||
// is moved out of TPU comutation, and we left a Placeholder node there).
|
||||
// is moved out of TPU computation, and we left a Placeholder node there).
|
||||
// Attribute value will be a string, which is the outside compilation cluster
|
||||
// name for the outside compilation Placeholder node.
|
||||
extern const char kXlaOutsideCompilationInputsAttrName[];
|
||||
|
@ -941,7 +941,7 @@ TEST_F(ExtractOutsideCompilationForFunctionTest,
|
||||
// "const0"
|
||||
// "identity0" = "const0" (outside compilation cluster "0")
|
||||
// "identity1" = "const0" "^identity0" (outside compilation cluster "1",
|
||||
// control depdent on cluster "0")
|
||||
// control dependent on cluster "0")
|
||||
// "identity2" = "identity1"
|
||||
FunctionDefLibrary fdl;
|
||||
{
|
||||
|
@ -123,7 +123,7 @@ class GraphCycles {
|
||||
absl::Span<const int32> Successors(int32 node) const;
|
||||
absl::Span<const int32> Predecessors(int32 node) const;
|
||||
|
||||
// Return a copy of the sucessors set. This is needed for code using the
|
||||
// Return a copy of the successors set. This is needed for code using the
|
||||
// collection while modifying the GraphCycles.
|
||||
std::vector<int32> SuccessorsCopy(int32 node) const;
|
||||
// Return a copy of the predecessors set. This is needed for code using the
|
||||
|
@ -1366,7 +1366,7 @@ Status MarkForCompilationPassImpl::Run() {
|
||||
void MarkForCompilationPassImpl::DumpPostClusteringGraphs() {
|
||||
DumpGraphToFile("mark_for_compilation", *graph_, flib_def_);
|
||||
|
||||
// We also dump out an annoated version of the TF graph where the nodes
|
||||
// We also dump out an annotated version of the TF graph where the nodes
|
||||
// names are prefixed with the cluster names. This can help visualizing the
|
||||
// clustering decisions on TensorBoard.
|
||||
Graph new_graph(graph_->op_registry());
|
||||
|
@ -187,7 +187,7 @@ impl::NodeMatcherProperties Op(string op);
|
||||
// Matches a node with assigned device `assigned_device`.
|
||||
impl::NodeMatcherProperties AssignedDevice(string assigned_device);
|
||||
|
||||
// Matches a node with a boolean typed attrbute named `name` and with value
|
||||
// Matches a node with a boolean typed attribute named `name` and with value
|
||||
// `value`.
|
||||
template <typename ValueTy>
|
||||
impl::NodeMatcherProperties Attr(const string& name, ValueTy value) {
|
||||
|
@ -125,7 +125,7 @@ TEST(NodeMatchers, CheckControlDependence) {
|
||||
"is any node");
|
||||
}
|
||||
|
||||
TEST(NodeMatchers, ConstVaulue) {
|
||||
TEST(NodeMatchers, ConstValue) {
|
||||
Scope root = Scope::NewRootScope().ExitOnError();
|
||||
Output placeholder =
|
||||
ops::Placeholder(root.WithOpName("placeholder"), DT_FLOAT);
|
||||
|
@ -110,7 +110,7 @@ Merges the outputs from the PartitionedCall node and the _XlaRun node.
|
||||
Unlike the TensorFlow Merge op, which requires inputs of some types to be
|
||||
placed on the host, the _XlaMerge op can merge inputs of all types when
|
||||
placed on the device. This prevents the need for copy operations, in
|
||||
particluar when an XLA cluster has int32 outputs. The _XlaMerge up does not
|
||||
particular when an XLA cluster has int32 outputs. The _XlaMerge up does not
|
||||
have a value_index output that identifies the chosen input.
|
||||
)");
|
||||
|
||||
|
@ -262,7 +262,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
|
||||
<< xla_tensor->shaped_buffer().ToString();
|
||||
// For devices don't allow sync on completion, the device execution is
|
||||
// deferred. We check the execution stream status here to avoid wrong
|
||||
// results from a failed stream being propogated to following
|
||||
// results from a failed stream being propagated to following
|
||||
// host-side ops.
|
||||
if (!device_allows_sync_on_completion) {
|
||||
done_status.Update(xla_tensor->RefreshStatusOfStreams());
|
||||
|
@ -222,7 +222,7 @@ Status CreateXlaKernel(FunctionLibraryRuntime* flr, const NodeDef& node_def,
|
||||
// using xla::ComputationDataHandle, which is just a symbolic handle that
|
||||
// xla::ComputationBuilder assigns. How does this handle gets assigned for
|
||||
// constant arguments? Even constant arguments get an _Arg node in the graph
|
||||
// instatiated for Function compilation. The tf2xla kernel for constant _Arg
|
||||
// instantiated for Function compilation. The tf2xla kernel for constant _Arg
|
||||
// nodes takes the constant value, converts it to XlaLiteral, and feeds it
|
||||
// to xla::ComputationBuilder.ConstantLiteral, which returns the handle. This
|
||||
// constant XlaLiteral is included in the HLO graph, and subsequently, in
|
||||
|
@ -84,7 +84,7 @@ VariableInfo::~VariableInfo() {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a vector of VaribleInfo instances for the resource variable inputs to
|
||||
// Returns a vector of VariableInfo instances for the resource variable inputs to
|
||||
// the kernel with context `ctx`. The input indices for the resource variable
|
||||
// inputs are in `variable_indices`.
|
||||
static Status GetVariableInfosFromCtxInputs(
|
||||
|
@ -416,7 +416,7 @@ bool RemoveRedundantStatsOps(mlir::FuncOp func,
|
||||
if (res->hasOneUse()) {
|
||||
if (auto next_stats = llvm::dyn_cast<quant::StatisticsOp>(
|
||||
*res->getUsers().begin())) {
|
||||
// quantization parameters can be propgated to next_stats
|
||||
// quantization parameters can be propagated to next_stats
|
||||
redundant_stats_ops.insert(next_stats);
|
||||
// add next_stats to the work list so propagation can
|
||||
// continue.
|
||||
|
@ -342,14 +342,14 @@ ElementsAttr Quantize(Attribute real_value, Type tensor_type);
|
||||
// parameters in this type is based on the min and max element of the
|
||||
// attribute. When the elements in the `attr` are not in floating-point, or
|
||||
// the value range isn't straddling zero, an empty type is returned. The min/max
|
||||
// are ajusted to be symmetric if `symmetric` flag is set to True. And
|
||||
// are adjusted to be symmetric if `symmetric` flag is set to True. And
|
||||
// `symmetric` can only be set to true when it is signed and narrow_range.
|
||||
Type GetUniformQuantizedTypeForWeight(ElementsAttr attr, bool symmetric,
|
||||
unsigned num_bits, bool is_sign,
|
||||
bool narrow_range);
|
||||
|
||||
// Returns the per channel quantized type for an element attribute.
|
||||
// `quant_dim` defines the quantization axis. The channel min/max are ajusted
|
||||
// `quant_dim` defines the quantization axis. The channel min/max are adjusted
|
||||
// to be symmetric if `symmetric` flag is set to True. And `symmetric` can only
|
||||
// be set to true when it is signed and narrow_range.
|
||||
Type GetUniformQuantizedPerAxisTypeForWeight(ElementsAttr attr, int quant_dim,
|
||||
|
@ -413,13 +413,13 @@ void PreprocessTopoSortGraph(
|
||||
}
|
||||
operation_to_in_degrees->try_emplace(&op, input_ops.size());
|
||||
for (auto* input_op : input_ops) {
|
||||
auto preceeding_op_it = operation_to_outputs->find(input_op);
|
||||
if (preceeding_op_it == operation_to_outputs->end()) {
|
||||
auto preceding_op_it = operation_to_outputs->find(input_op);
|
||||
if (preceding_op_it == operation_to_outputs->end()) {
|
||||
auto result = operation_to_outputs->try_emplace(
|
||||
input_op, llvm::DenseSet<Operation*>());
|
||||
preceeding_op_it = result.first;
|
||||
preceding_op_it = result.first;
|
||||
}
|
||||
preceeding_op_it->second.insert(&op);
|
||||
preceding_op_it->second.insert(&op);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -394,14 +394,14 @@ struct FuseBinaryOpToFollowingAffineOp : public OpRewritePattern<AffineOpType> {
|
||||
// w * (x ' c) + b => (w ' c) x + b
|
||||
// so we have to update the weight.
|
||||
bool is_mul = llvm::isa<MulOp>(binary_op);
|
||||
auto new_fitler =
|
||||
auto new_filter =
|
||||
filter_cst.mapValues(filter_type.getElementType(), [&](APFloat it) {
|
||||
return (is_mul ? it * cst_value : it / cst_value).bitcastToAPInt();
|
||||
});
|
||||
// We recreate the constant op in case it is shared by the other ops. This
|
||||
// might increase the model size.
|
||||
auto new_filter_op = rewriter.create<ConstOp>(
|
||||
fc_op.getLoc(), filter->getType(), new_fitler);
|
||||
fc_op.getLoc(), filter->getType(), new_filter);
|
||||
fc_op.setOperand(0, binary_op->getOperand(0));
|
||||
if (fc_op.filter() != filter) {
|
||||
// This filter goes through quantize and dequantize ops. Then we just
|
||||
|
@ -132,8 +132,8 @@ class FoldIfOp : public OpRewritePattern<TF::IfOp> {
|
||||
|
||||
// Erases functions from the given candidates that are not referenced by any of
|
||||
// the ops in the module.
|
||||
static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
|
||||
if (candiate_funcs.empty()) return;
|
||||
static void EraseDeadFuncs(const FuncSet& candidate_funcs, ModuleOp module) {
|
||||
if (candidate_funcs.empty()) return;
|
||||
|
||||
SymbolTable manager(module);
|
||||
|
||||
@ -149,7 +149,7 @@ static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
|
||||
}
|
||||
});
|
||||
|
||||
for (FuncOp func : candiate_funcs) {
|
||||
for (FuncOp func : candidate_funcs) {
|
||||
if (!in_use_funcs.count(func)) manager.erase(func);
|
||||
}
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ struct InsertTFLQuantOpsAfterTFFakeQuantOp
|
||||
|
||||
int quant_dim = -1;
|
||||
if (PerAxis) {
|
||||
// This is a special case that the quant_dim is the last dimentions.
|
||||
// This is a special case that the quant_dim is the last dimensions.
|
||||
quant_dim = res->getType().template cast<ShapedType>().getRank() - 1;
|
||||
}
|
||||
// Use the min/max from the operands and the num_bits and narrow_range
|
||||
|
@ -98,7 +98,7 @@ Value* SliceRankedTensor(OpBuilder* builder, Value* input,
|
||||
ArrayRef<int64_t> size_values,
|
||||
mlir::Location location) {
|
||||
// If the size of the tensor to be sliced from the input overflows
|
||||
// the input tensor's dimenions, return 0-valued tensor of the requested
|
||||
// the input tensor's dimensions, return 0-valued tensor of the requested
|
||||
// shape.
|
||||
ArrayRef<int64_t> input_shape = GetRankedTensorShape(input);
|
||||
for (int i = 0; i < input_shape.size(); i++) {
|
||||
|
@ -122,7 +122,7 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
|
||||
std::get<1>(operand_and_result));
|
||||
}
|
||||
} else if (auto replicate = llvm::dyn_cast<tf_device::ReplicateOp>(op)) {
|
||||
// The nested block for RepliateOp is handled separately in side-effect
|
||||
// The nested block for ReplicateOp is handled separately in side-effect
|
||||
// analysis. Inside that block, we can still treat its block arguments as
|
||||
// different resources.
|
||||
for (auto arg : replicate.GetBody().getArguments()) {
|
||||
@ -305,7 +305,7 @@ void SideEffectAnalysis::AnalyzeRegion(
|
||||
// region, and tracking resource accesses in per_resource_access_info_.
|
||||
|
||||
// Returns whether an access to `resource` can skip control edges from
|
||||
// prevoius accesses to unknown resources, due to that earlier accesses to
|
||||
// previous accesses to unknown resources, due to that earlier accesses to
|
||||
// `resource` already indirectly tracked previous accesses to uknown
|
||||
// resources. `read_only` specifies the type of access of the current op being
|
||||
// considered.
|
||||
|
@ -105,7 +105,7 @@ class SideEffectAnalysis {
|
||||
void ConsumeChildAnalyses(
|
||||
llvm::SmallVector<SideEffectAnalysis, 4>&& children);
|
||||
|
||||
// Updates control_predecessors_ for `op` that is being visted, on the given
|
||||
// Updates control_predecessors_ for `op` that is being visited, on the given
|
||||
// `resource_id`.
|
||||
void AddPredecessorsForAccess(int64_t resource_id, Operation* op,
|
||||
bool read_only);
|
||||
@ -124,7 +124,7 @@ class SideEffectAnalysis {
|
||||
sorted_control_successors_;
|
||||
|
||||
// Internal per-resource data structure when we build the dependencies.
|
||||
struct PerResourceAcessInfo {
|
||||
struct PerResourceAccessInfo {
|
||||
// Last op that writes the resource before the current op being analyzed.
|
||||
Operation* last_write = nullptr;
|
||||
// Read ops since last_write before the current op being analyzed.
|
||||
@ -134,7 +134,7 @@ class SideEffectAnalysis {
|
||||
bool tracked_last_unknown_read = false;
|
||||
bool tracked_last_unknown_write = false;
|
||||
};
|
||||
llvm::SmallDenseMap<int64_t, PerResourceAcessInfo, 8>
|
||||
llvm::SmallDenseMap<int64_t, PerResourceAccessInfo, 8>
|
||||
per_resource_access_info_;
|
||||
};
|
||||
|
||||
|
@ -1317,7 +1317,7 @@ Operations are applied to the input(s) according to the following rules:
|
||||
Considering the batch matrix multiplication equation again
|
||||
(`bij,bjk->bik`), the contracted axis label is `j`.
|
||||
|
||||
(e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
|
||||
(e) Expand Diagonal: If the output subscripts contain repeated (explicit) axis
|
||||
labels, the opposite operation of (a) is applied. For example, in the
|
||||
equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
|
||||
are all zeros, except for the (generalized) diagonal which is populated
|
||||
@ -1325,7 +1325,7 @@ Operations are applied to the input(s) according to the following rules:
|
||||
Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
|
||||
provided to enable computing the symbolic gradient of `tf.einsum`.
|
||||
|
||||
The output subcripts must contain only labels appearing in at least one of the
|
||||
The output subscripts must contain only labels appearing in at least one of the
|
||||
input subscripts. Furthermore, all dimensions mapping to the same axis label
|
||||
must be equal.
|
||||
|
||||
@ -1337,7 +1337,7 @@ according to standard NumPy broadcasting
|
||||
|
||||
The broadcasted dimensions are placed in the corresponding location of the
|
||||
ellipsis in the output subscript. If the broadcasted dimensions are non-empty
|
||||
and the output subcripts do not contain ellipsis, then an InvalidArgument error
|
||||
and the output subscripts do not contain ellipsis, then an InvalidArgument error
|
||||
is raised.
|
||||
|
||||
@compatibility(numpy)
|
||||
|
@ -84,7 +84,7 @@ DenseIntElementsAttr GetBiasAddGradReductionIndices(int64_t rank,
|
||||
tensorflow::TensorFormat format;
|
||||
if (!FormatFromString(data_format.getValue().str(), &format)) return {};
|
||||
|
||||
// Reudce along all dimensions except the feature dimension.
|
||||
// Reduce along all dimensions except the feature dimension.
|
||||
int64_t feature_dim = GetTensorFeatureDimIndex(rank, format);
|
||||
llvm::SmallVector<int64_t, 4> dims_to_reduce(rank - 1);
|
||||
std::iota(dims_to_reduce.begin(), dims_to_reduce.begin() + feature_dim, 0);
|
||||
|
@ -45,7 +45,7 @@ struct ExecutorToControlDialectConversion
|
||||
|
||||
// Replace all uses of value `v` with a list of new values. Because number of
|
||||
// new values might be greater than 1, users of `v` might be replaced with their
|
||||
// clones in case of non-resizble operands list.
|
||||
// clones in case of non-resizable operands list.
|
||||
void ReplaceAllUsesOfValueWithValues(Value *v,
|
||||
Operation::operand_range new_values) {
|
||||
int new_values_size = std::distance(new_values.begin(), new_values.end());
|
||||
|
@ -197,7 +197,7 @@ class Exporter {
|
||||
|
||||
// Each NextIteration node in the original graph is converted to a pair of
|
||||
// source and sink operations in the MLIR, and we use the following two maps
|
||||
// to pair and convet them back to a single NextIteration node. We choose to
|
||||
// to pair and convert them back to a single NextIteration node. We choose to
|
||||
// the "name" attribute, which is from the unique node name, to find out the
|
||||
// pairs: When scanning the operations in the block, the source operations
|
||||
// are inserted to the name_to_inst_ first, and the other "sink" operation
|
||||
|
@ -2283,7 +2283,7 @@ class StructuredValueLinearizer {
|
||||
// Returns the list of index paths to each leaf of the StructuredValue,
|
||||
// in a linearized order matching `tf.nest.flatten`.
|
||||
//
|
||||
// If an error ocurred during the linearization process, an error message with
|
||||
// If an error occurred during the linearization process, an error message with
|
||||
// `error_context` prepended will be included in the returned status.
|
||||
StatusOr<llvm::ArrayRef<mlir::ArrayAttr>> GetLeafIndexPaths(
|
||||
llvm::StringRef error_context) const;
|
||||
|
@ -759,7 +759,7 @@ def HLO_UnaryEinsumOp: HLO_Op<"unary_einsum", [NoSideEffect]> {
|
||||
|
||||
let hasCanonicalizer = 1;
|
||||
|
||||
// UnarayEinsumOp is unconditionally canonicalized to the binary EinsumOp so
|
||||
// UnaryEinsumOp is unconditionally canonicalized to the binary EinsumOp so
|
||||
// the HLO converter shouldn't be invoked.
|
||||
let hasCustomHLOConverter = 1;
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ def DynamicSliceToSlice: Pat<(HLO_DynamicSliceOp HLO_Tensor:$input,
|
||||
(BuildSliceLimits $starting_indices, $slice_sizes),
|
||||
(BuildSliceStrides $input))>;
|
||||
|
||||
def UnaryToBianryEinsumEq : NativeCodeCall<
|
||||
def UnaryToBinaryEinsumEq : NativeCodeCall<
|
||||
"$_builder.getStringAttr(\",\" + $0.getValue().str())">;
|
||||
|
||||
// Convert UnaryEinsumOp to EinsumOp with two operands with redundant first
|
||||
@ -46,4 +46,4 @@ def UnaryToBianryEinsumEq : NativeCodeCall<
|
||||
def UnaryEinsumToEinsum : Pat<
|
||||
(HLO_UnaryEinsumOp $operand, $equation),
|
||||
(HLO_EinsumOp (HLO_ConstOp (GetScalarOfType<1> $operand)),
|
||||
$operand, (UnaryToBianryEinsumEq $equation))>;
|
||||
$operand, (UnaryToBinaryEinsumEq $equation))>;
|
||||
|
@ -448,7 +448,7 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
|
||||
// `element_types`, create two block arguments, one for lhs and one for rhs, and
|
||||
// generates xla_hlo.compare op to compare them with the given `direction`.
|
||||
//
|
||||
// Note that this right now only does comparsion on the first pair of block
|
||||
// Note that this right now only does comparision on the first pair of block
|
||||
// arguments.
|
||||
static void BuildSortComparisonBody(llvm::ArrayRef<Type> element_types,
|
||||
StringRef direction, Region *body,
|
||||
@ -2149,7 +2149,7 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
|
||||
// Converts tf.Unpack to a series of XLA HLO slice ops.
|
||||
//
|
||||
// Each slice takes one element along the dimension to unpack and takes the full
|
||||
// range for all other dimenions. Each slice is then reshaped to drop the
|
||||
// range for all other dimensions. Each slice is then reshaped to drop the
|
||||
// dimension to unpack (which is always of size 1).
|
||||
// TODO(antiagainst): consider changing this into a TF internal lowering pass.
|
||||
class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
|
||||
|
@ -107,8 +107,8 @@ def : Pat<(HLO_AbsOp HLO_ComplexTensor:$val),
|
||||
(NullDenseIntElementsAttr))),
|
||||
(HLO_ConstOp (ConstantSplat<"0"> $real)))>;
|
||||
|
||||
// Expononetial can be lowered to an exponential on the real component and a
|
||||
// sum of sinusoids of the imageinary component, which equates to a normal
|
||||
// Exponential can be lowered to an exponential on the real component and a
|
||||
// sum of sinusoids of the imaginary component, which equates to a normal
|
||||
// exponential operator multiplied by Euler's formula.
|
||||
//
|
||||
// Exp(a + ib) = Exp(a) * Exp(ib) = Exp(a) * (Cos(b) + iSin(b))
|
||||
|
@ -157,7 +157,7 @@ inline Operation* MapLhloOpToStdScalarOp<xla_lhlo::CompareOp>(
|
||||
if (element_type.isa<IntegerType>()) {
|
||||
Optional<CmpIPredicate> predicate =
|
||||
getIntCmpPredicate(lhlo_op.comparison_direction());
|
||||
assert(predicate.hasValue() && "expected valid comparision direction");
|
||||
assert(predicate.hasValue() && "expected valid comparison direction");
|
||||
return b.create<ScalarIOp<CompareOp>>(lhlo_op.getLoc(),
|
||||
predicate.getValue(), lhs, rhs);
|
||||
}
|
||||
|
@ -114,7 +114,7 @@ def square_cases(align=None):
|
||||
[6, 7, 8, 9, 1],
|
||||
[2, 3, 4, 5, 6]]])
|
||||
tests = dict()
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
|
||||
tests[-1, -1] = (np.array([[6, 4, 1, 7],
|
||||
[5, 2, 8, 5]]),
|
||||
np.array([[[0, 0, 0, 0, 0],
|
||||
@ -192,7 +192,7 @@ def tall_cases(align=None):
|
||||
[7, 8, 9],
|
||||
[9, 8, 7]]])
|
||||
tests = dict()
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
|
||||
tests[0, 0] = (np.array([[1, 5, 9],
|
||||
[3, 2, 6]]),
|
||||
np.array([[[1, 0, 0],
|
||||
@ -276,7 +276,7 @@ def fat_cases(align=None):
|
||||
[8, 9, 1, 2],
|
||||
[3, 4, 5, 6]]])
|
||||
tests = dict()
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagnals)
|
||||
# tests[d_lower, d_upper] = (compact_diagonals, padded_diagonals)
|
||||
tests[0, 0] = (np.array([[1, 6, 2],
|
||||
[4, 9, 5]]),
|
||||
np.array([[[1, 0, 0, 0],
|
||||
|
@ -49,7 +49,7 @@ class QuantizedOpsTest(xla_test.XLATestCase):
|
||||
self.assertAllEqual(value, expected)
|
||||
|
||||
|
||||
class DeuantizedOpsTest(xla_test.XLATestCase):
|
||||
class DequantizedOpsTest(xla_test.XLATestCase):
|
||||
|
||||
def pack_uint8_r2_to_uint32(self, test_input):
|
||||
num_rows, num_columns = test_input.get_shape().as_list()
|
||||
|
@ -3423,7 +3423,7 @@ int main(int argc, char** argv) {
|
||||
tensorflow::Flag(
|
||||
"tf_xla_random_seed", &tensorflow::tf_xla_random_seed,
|
||||
"Random seed to use for XLA tests. <= 0 means choose a seed "
|
||||
"nondetermistically."),
|
||||
"nondeterministically."),
|
||||
// TODO(phawkins): it might make more sense to run each test up to a
|
||||
// configurable time bound.
|
||||
tensorflow::Flag("tf_xla_test_repetitions",
|
||||
|
@ -161,7 +161,7 @@ Status GetEngineInfo(const Graph* g,
|
||||
const int node_id = node->id();
|
||||
const string& node_name = node->name();
|
||||
|
||||
// Create input connections. Sort edges first to make determnistic since
|
||||
// Create input connections. Sort edges first to make deterministic since
|
||||
// in_edges is a set of pointers.
|
||||
std::vector<const Edge*> in_edges(node->in_edges().begin(),
|
||||
node->in_edges().end());
|
||||
@ -186,7 +186,7 @@ Status GetEngineInfo(const Graph* g,
|
||||
// If it doesn't have any edges, TF will prune it out.
|
||||
//
|
||||
// Note that the segmenter already ensure that the constant data input
|
||||
// is valid and suppported by the engine.
|
||||
// is valid and supported by the engine.
|
||||
if (!added_const_nodes.insert(input_node).second) {
|
||||
// Already added before.
|
||||
continue;
|
||||
@ -209,7 +209,7 @@ Status GetEngineInfo(const Graph* g,
|
||||
node_id, edge->dst_input(), /*input_edge=*/true, port);
|
||||
}
|
||||
}
|
||||
// Create output connections. Sort edges first to make determnistic since
|
||||
// Create output connections. Sort edges first to make deterministic since
|
||||
// out_edges is a set of pointers.
|
||||
std::vector<const Edge*> out_edges(node->out_edges().begin(),
|
||||
node->out_edges().end());
|
||||
|
@ -2511,7 +2511,7 @@ Status ConvertStridedSliceHelper(OpConverterParams* params,
|
||||
return Status::OK();
|
||||
} else if (pad_dims.size() == 1) {
|
||||
// Only one dim is modified but we have to have 2, mark a second dim which
|
||||
// will have padding of 0. The dim we add is chosen to avoid an unecessary
|
||||
// will have padding of 0. The dim we add is chosen to avoid an unnecessary
|
||||
// transpose.
|
||||
if (pad_dims[0] != 2) {
|
||||
pad_dims.push_back(2);
|
||||
|
@ -141,9 +141,9 @@ Status ConvertSegmentToGraphDef(
|
||||
// Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff
|
||||
// 'builder' successfully build the engine. If the result is not ok, 'engine'
|
||||
// will be set to nullptr
|
||||
// Once returned, 'builder' is not needed any more and can be safely detroyed.
|
||||
// Once returned, 'builder' is not needed any more and can be safely destroyed.
|
||||
//
|
||||
// - convert_successfully: indicates whether the converson to TensorRT network
|
||||
// - convert_successfully: indicates whether the conversion to TensorRT network
|
||||
// is successful. This is different than successfully building the engine:
|
||||
// building can still fail afterwards.
|
||||
Status ConvertGraphDefToEngine(
|
||||
|
@ -521,7 +521,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) {
|
||||
"Scalar input tensor is not supported since the first dimension "
|
||||
"is treated as batch dimension by TRT");
|
||||
}
|
||||
// Convert non-Const. We test the case where the non-batch dimemsion is
|
||||
// Convert non-Const. We test the case where the non-batch dimension is
|
||||
// unknown as well, to make sure the validator allows that.
|
||||
for (const int32 non_batch_dim : {-1, 2}) {
|
||||
const int32 batch_size = 12;
|
||||
@ -973,7 +973,7 @@ TEST_F(ConverterTest, GetWeightRange) {
|
||||
|
||||
TEST_F(ConverterTest, ProvideQuantizationRange) {
|
||||
FakeITensor fake_tensor;
|
||||
// Assymetric range
|
||||
// Asymmetric range
|
||||
converter_->ProvideQuantizationRange(&fake_tensor, 0.0f, 6.0f);
|
||||
EXPECT_EQ(6.0f, quantization_ranges()[&fake_tensor]);
|
||||
converter_->ProvideQuantizationRange(&fake_tensor, 1.0f, 6.0f);
|
||||
|
@ -125,7 +125,7 @@ class TRTEngineOp : public AsyncOpKernel {
|
||||
// Verify that the input shapes are consistent and can be handled by this op.
|
||||
Status VerifyInputShapes(const std::vector<TensorShape>& shapes);
|
||||
|
||||
// Return engine batch in cached_engne_batch_sizes_ which is closest to input
|
||||
// Return engine batch in cached_engine_batch_sizes_ which is closest to input
|
||||
// batch.
|
||||
Status GetEngineInputShapes(
|
||||
const CacheType& cache,
|
||||
|
@ -112,7 +112,7 @@ TEST_F(TRTEngineResourceOpsTest, Basic) {
|
||||
EXPECT_TRUE(
|
||||
errors::IsNotFound(rm->Lookup(container, resource_name, &resource)));
|
||||
|
||||
// Create the resouce using an empty file with InitializeTRTResource.
|
||||
// Create the resource using an empty file with InitializeTRTResource.
|
||||
Reset();
|
||||
Env* env = Env::Default();
|
||||
const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file");
|
||||
|
@ -466,7 +466,7 @@ Status SegmentGraph(const Graph* tf_graph,
|
||||
// grow from the output-side of the network towards the inputs.
|
||||
//
|
||||
// In general this is not guaranteed to produce a globally optimal
|
||||
// segmentation. For exaample, consider graph with node {A, B, C, D} and edges
|
||||
// segmentation. For example, consider graph with node {A, B, C, D} and edges
|
||||
// {A->B, A->C, B->D, C->D), where A, B, D are trt compatible but C is not, so
|
||||
// in theory we can choose to contract either A, B or B, D but not both, but
|
||||
// here it always choose to contract B, D.
|
||||
|
@ -34,7 +34,7 @@ namespace tensorrt {
|
||||
// TRTs pull model for calibration. When TRT implements a means for
|
||||
// a push calibration This class should be updated accordingly
|
||||
|
||||
// IInt8EntropyCalibrator2 is prefferred for TRT 5.1+.
|
||||
// IInt8EntropyCalibrator2 is preferred for TRT 5.1+.
|
||||
#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
|
||||
struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
#else
|
||||
|
@ -572,7 +572,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
|
||||
if (visited.at(n->id())) continue;
|
||||
visited[n->id()] = true;
|
||||
|
||||
// Verify output edges and record control edges exitting scope.
|
||||
// Verify output edges and record control edges exiting scope.
|
||||
for (const Edge* e : n->out_edges()) {
|
||||
Node* dst = e->dst();
|
||||
if (IsMerge(dst)) continue;
|
||||
@ -602,7 +602,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
|
||||
}
|
||||
}
|
||||
|
||||
// Copying incomming edges to dst node. Iterate over a copy of the edges
|
||||
// Copying incoming edges to dst node. Iterate over a copy of the edges
|
||||
// as they could be mutated during iteration.
|
||||
std::vector<const Edge*> in_edges(n->in_edges().begin(),
|
||||
n->in_edges().end());
|
||||
@ -719,7 +719,7 @@ Status Conditional::ExtractBodies(Graph* graph) {
|
||||
++index;
|
||||
|
||||
// Connect the input to the merge_ with the retval, except if it is a
|
||||
// Swich node, which is handled separately.
|
||||
// Switch node, which is handled separately.
|
||||
for (auto e : m->in_edges()) {
|
||||
if (e->IsControlEdge()) continue;
|
||||
int branch_index = static_cast<int>(find_branch(e));
|
||||
@ -1139,7 +1139,7 @@ StateMap::CondId FunctionalizeCond::StateAlongEdge(const Edge* e) {
|
||||
// node. If we don't record this into CondState, branches might have
|
||||
// incorrect CondState (e.g. if the branch only has a Const data node).
|
||||
// We set it to kNeither because there is no way to tell whether it's
|
||||
// for true branch or false branch. This node's desendents might have
|
||||
// for true branch or false branch. This node's descendents might have
|
||||
// other incoming edges with defined BranchType, and we correctly handle
|
||||
// merging kNeither with other defined BranchType in StateAlongEdge().
|
||||
state[predicate] = BranchType::kNeither;
|
||||
|
@ -213,7 +213,7 @@ class FunctionalizeCond {
|
||||
// This populates the state_map_.
|
||||
Status DetermineStates(std::vector<Node*> rev_topo_order);
|
||||
|
||||
// Determine the CondState for a given node using the incomming edges
|
||||
// Determine the CondState for a given node using the incoming edges
|
||||
// to the node. Note: it is expected that this node's CondState is only
|
||||
// determined once its input's CondState is.
|
||||
Status DetermineCondState(Node* dst) {
|
||||
|
@ -22,7 +22,7 @@ namespace tensorflow {
|
||||
|
||||
namespace {
|
||||
|
||||
// This TensorFlow op supports the Assert primitve.
|
||||
// This TensorFlow op supports the Assert primitive.
|
||||
class AssertOp : public XlaOpKernel {
|
||||
public:
|
||||
explicit AssertOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
|
||||
|
@ -329,7 +329,7 @@ class MaxPoolGradOp : public XlaOpKernel {
|
||||
(padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame;
|
||||
|
||||
// Create a MaxPool operation to check the expected resulting shape, and
|
||||
// then throw away the operation because we don't actually neeed it here.
|
||||
// then throw away the operation because we don't actually need it here.
|
||||
TensorShape expected_out_shape;
|
||||
auto pooling =
|
||||
xla::MaxPool(ctx->Input(0), ksize_, stride_, xla_padding,
|
||||
|
@ -37,7 +37,7 @@ class XlaConvOp : public XlaOpKernel {
|
||||
context, context->GetAttr("precision_config", &precision_config_attr));
|
||||
OP_REQUIRES(context,
|
||||
precision_config_.ParsePartialFromString(precision_config_attr),
|
||||
errors::InvalidArgument("Error parsing precison config."));
|
||||
errors::InvalidArgument("Error parsing precision config."));
|
||||
}
|
||||
|
||||
void Compile(XlaOpKernelContext* context) override {
|
||||
|
@ -32,7 +32,7 @@ class XlaSvdOp : public XlaOpKernel {
|
||||
ctx->GetAttr("precision_config", &precision_config_attr));
|
||||
OP_REQUIRES(ctx,
|
||||
precision_config_.ParsePartialFromString(precision_config_attr),
|
||||
errors::InvalidArgument("Error parsing precison config."));
|
||||
errors::InvalidArgument("Error parsing precision config."));
|
||||
if (precision_config_.operand_precision_size() == 0) {
|
||||
precision_config_.mutable_operand_precision()->Add(
|
||||
xla::PrecisionConfig::HIGHEST);
|
||||
|
@ -84,7 +84,7 @@ lower: a boolean specifies whether the calculation is done with the lower
|
||||
|
||||
max_iter: maximum number of sweep update, i.e., the whole lower triangular
|
||||
part or upper triangular part based on parameter lower. Heuristically, it has
|
||||
been argued that approximatly logN sweeps are needed in practice (Ref: Golub &
|
||||
been argued that approximately logN sweeps are needed in practice (Ref: Golub &
|
||||
van Loan "Matrix Computation").
|
||||
|
||||
epsilon: the tolerance ratio.
|
||||
@ -116,7 +116,7 @@ a: the input tensor.
|
||||
|
||||
max_iter: maximum number of sweep update, i.e., the whole lower triangular
|
||||
part or upper triangular part based on parameter lower. Heuristically, it has
|
||||
been argued that approximatly log(min (M, N)) sweeps are needed in practice
|
||||
been argued that approximately log(min (M, N)) sweeps are needed in practice
|
||||
(Ref: Golub & van Loan "Matrix Computation").
|
||||
|
||||
epsilon: the tolerance ratio.
|
||||
@ -610,7 +610,7 @@ REGISTER_OP("XlaDequantize")
|
||||
.SetShapeFn(shape_inference::UnknownShape)
|
||||
.Doc(R"doc(
|
||||
Takes the packed uint32 input and unpacks the input to uint8 to do
|
||||
Dequantization on deivce.
|
||||
Dequantization on device.
|
||||
|
||||
input: Input tensors whose types is uint32, shape is [d0, ..., dn].
|
||||
output: Output tensors whose types is bloat16. If transpose_output is true,
|
||||
@ -644,7 +644,7 @@ REGISTER_OP("XlaEinsum")
|
||||
.Doc(R"doc(
|
||||
An op which supports basic einsum op with 2 inputs and 1 output.
|
||||
|
||||
This op has better TPU performnce since it doesn't have explicitly reshape and
|
||||
This op has better TPU performance since it doesn't have explicitly reshape and
|
||||
transpose operations as tf.einsum does.
|
||||
)doc");
|
||||
|
||||
|
@ -51,7 +51,7 @@ xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
|
||||
// In case the input shape is a tuple, the minor-to-major values will be in the
|
||||
// order of the tuple elements within the tuple shape.
|
||||
// If a shape (or a subshape of a tuple shape) has missing layout, a rank long
|
||||
// sequence of -1 values will be emittted.
|
||||
// sequence of -1 values will be emitted.
|
||||
xla::StatusOr<std::vector<int>> GetShapeLayoutVector(const xla::Shape& shape);
|
||||
|
||||
// Given the input shape and a linearized sequence of the minor-to-major values
|
||||
|
@ -52,7 +52,7 @@ message Variable {
|
||||
TensorShapeProto shape = 3;
|
||||
DataType type = 4;
|
||||
|
||||
// Flag for variables that are never assigned. Assigments to a read-only
|
||||
// Flag for variables that are never assigned. Assignments to a read-only
|
||||
// variable or unassigned variables that are not read-only are invalid.
|
||||
bool readonly = 5;
|
||||
}
|
||||
|
@ -213,13 +213,13 @@ class XlaOpKernelContext {
|
||||
return dynamic_dimension_is_minus_one_;
|
||||
}
|
||||
|
||||
// Reads the current value of the resouce variable referred to by input
|
||||
// Reads the current value of the resource variable referred to by input
|
||||
// `index`. If `shape` is not nullptr, sets `*shape` to the shape of the
|
||||
// variable. Returns an error if the variable has not been initialized, or if
|
||||
// its type does not match `type`.
|
||||
Status ReadVariableInput(int index, DataType type, TensorShape* shape,
|
||||
xla::XlaOp* value);
|
||||
// Reads the current value of the resouce variable referred to by input
|
||||
// Reads the current value of the resource variable referred to by input
|
||||
// `name`.
|
||||
Status ReadVariableInput(absl::string_view name, DataType type,
|
||||
TensorShape* shape, xla::XlaOp* value);
|
||||
|
@ -73,7 +73,7 @@ void BuildComparatorAndComparisons(ComparatorsTest* test,
|
||||
}
|
||||
}
|
||||
|
||||
// Concantenate the comparison results.
|
||||
// Concatenate the comparison results.
|
||||
ConcatInDim(test->builder(), all_comparisons, 0);
|
||||
|
||||
// If we use less-than comparisons, we expect the comparison to result in true
|
||||
|
@ -316,7 +316,7 @@ Status ValidateEinsumNumericDimensions(absl::Span<const int64> x_config,
|
||||
|
||||
namespace {
|
||||
// Helper method to remove dimensions from a shape and dot dimension numbers
|
||||
// used to implment implicit broadcasting.
|
||||
// used to implement implicit broadcasting.
|
||||
template <typename C>
|
||||
void DeleteDimsFromContainer(absl::Span<const int64> to_delete, Shape* shape,
|
||||
C* batch_dims, C* contracting_dims) {
|
||||
@ -473,7 +473,7 @@ xla::XlaOp Einsum(xla::XlaOp x, absl::Span<const int64> x_config, xla::XlaOp y,
|
||||
transpose_dims[output_transpose_dims[i]] = i;
|
||||
}
|
||||
|
||||
// Remove ones that where broadcated from the x and the y shape and adjust
|
||||
// Remove ones that where broadcasted from the x and the y shape and adjust
|
||||
// the dimension numbers that are more minor than those dimensions.
|
||||
DeleteDimsFromContainer(lhs_delete_dims, &x_shape,
|
||||
dnums.mutable_lhs_batch_dimensions(),
|
||||
|
@ -132,7 +132,7 @@ xla::XlaOp Einsum(
|
||||
// the input.
|
||||
xla::XlaOp EinsumDiagonal(XlaOp x, absl::Span<const int64> config);
|
||||
|
||||
// Same as above but supporting numeric labels on dimensins. So "ab,cb->ac"
|
||||
// Same as above but supporting numeric labels on dimensions. So "ab,cb->ac"
|
||||
// becomes:
|
||||
// x_config = {0, 1}
|
||||
// y_config = {2, 1}
|
||||
|
@ -39,7 +39,7 @@ XlaOp AvgPoolDivideByCountWithGeneralPadding(
|
||||
std::vector<int64> window_ksize(num_spatial_dims);
|
||||
std::vector<int64> window_stride(num_spatial_dims);
|
||||
CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
|
||||
<< "Invalid number of spatial dimentions in data format specification";
|
||||
<< "Invalid number of spatial dimensions in data format specification";
|
||||
for (int i = 0; i < num_spatial_dims; ++i) {
|
||||
int dim = data_format.spatial_dimension(i);
|
||||
input_dim_sizes[i] = input_shape[dim];
|
||||
@ -95,7 +95,7 @@ PaddingConfig MakeSpatialPaddingConfig(
|
||||
padding_config.add_dimensions();
|
||||
}
|
||||
CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
|
||||
<< "Invalid number of spatial dimentions in data format specification";
|
||||
<< "Invalid number of spatial dimensions in data format specification";
|
||||
for (int i = 0; i < num_spatial_dims; ++i) {
|
||||
int dim = data_format.spatial_dimension(i);
|
||||
auto padding_dimension = padding_config.mutable_dimensions(dim);
|
||||
@ -178,7 +178,7 @@ std::vector<std::pair<int64, int64>> MakeSpatialPadding(
|
||||
std::vector<int64> kernel_size_spatial_dimensions;
|
||||
std::vector<int64> stride_spatial_dimensions;
|
||||
CHECK_EQ(data_format.num_spatial_dims(), num_spatial_dims)
|
||||
<< "Invalid number of spatial dimentions in data format specification";
|
||||
<< "Invalid number of spatial dimensions in data format specification";
|
||||
for (int i = 0; i < num_spatial_dims; ++i) {
|
||||
int dim = data_format.spatial_dimension(i);
|
||||
input_spatial_dimensions.push_back(input_size[dim]);
|
||||
|
@ -154,29 +154,29 @@ XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim, bool sparse) {
|
||||
return TorchIndexSelect(input, index, 0);
|
||||
}
|
||||
if (!sparse) {
|
||||
std::vector<int64> index_broacast_dims;
|
||||
std::vector<int64> input_broacast_dims;
|
||||
std::vector<int64> index_broadcast_dims;
|
||||
std::vector<int64> input_broadcast_dims;
|
||||
std::vector<int64> sizes;
|
||||
for (int64 i = 0; i < index_shape.rank(); ++i) {
|
||||
if (i < dim) {
|
||||
input_broacast_dims.push_back(i);
|
||||
index_broacast_dims.push_back(i);
|
||||
input_broadcast_dims.push_back(i);
|
||||
index_broadcast_dims.push_back(i);
|
||||
} else if (i == dim) {
|
||||
sizes.push_back(input_shape.dimensions(i));
|
||||
input_broacast_dims.push_back(i);
|
||||
index_broacast_dims.push_back(i + 1);
|
||||
input_broadcast_dims.push_back(i);
|
||||
index_broadcast_dims.push_back(i + 1);
|
||||
} else {
|
||||
input_broacast_dims.push_back(i + 1);
|
||||
index_broacast_dims.push_back(i + 1);
|
||||
input_broadcast_dims.push_back(i + 1);
|
||||
index_broadcast_dims.push_back(i + 1);
|
||||
}
|
||||
sizes.push_back(index_shape.dimensions(i));
|
||||
}
|
||||
auto mask = Eq(
|
||||
BroadcastInDim(index, sizes, index_broacast_dims),
|
||||
BroadcastInDim(index, sizes, index_broadcast_dims),
|
||||
Iota(builder, ShapeUtil::MakeShape(index_shape.element_type(), sizes),
|
||||
dim));
|
||||
auto masked_input = Select(
|
||||
mask, BroadcastInDim(input, sizes, input_broacast_dims),
|
||||
mask, BroadcastInDim(input, sizes, input_broadcast_dims),
|
||||
Zeros(builder,
|
||||
ShapeUtil::MakeShape(input_shape.element_type(), sizes)));
|
||||
return Reduce(masked_input, Zero(builder, input_shape.element_type()),
|
||||
@ -214,25 +214,25 @@ XlaOp TorchScatterDense(XlaOp input, XlaOp index, XlaOp src, int64 dim,
|
||||
return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
|
||||
TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index));
|
||||
TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input));
|
||||
std::vector<int64> index_broacast_dims;
|
||||
std::vector<int64> index_broadcast_dims;
|
||||
std::vector<int64> sizes;
|
||||
for (int64 i = 0; i < index_shape.rank(); ++i) {
|
||||
if (i < dim) {
|
||||
index_broacast_dims.push_back(i);
|
||||
index_broadcast_dims.push_back(i);
|
||||
} else {
|
||||
if (i == dim) {
|
||||
sizes.push_back(input_shape.dimensions(i));
|
||||
}
|
||||
index_broacast_dims.push_back(i + 1);
|
||||
index_broadcast_dims.push_back(i + 1);
|
||||
}
|
||||
sizes.push_back(index_shape.dimensions(i));
|
||||
}
|
||||
auto mask =
|
||||
Eq(BroadcastInDim(index, sizes, index_broacast_dims),
|
||||
Eq(BroadcastInDim(index, sizes, index_broadcast_dims),
|
||||
Iota(builder,
|
||||
ShapeUtil::MakeShape(index_shape.element_type(), sizes), dim));
|
||||
auto masked_src =
|
||||
Select(mask, BroadcastInDim(src, sizes, index_broacast_dims),
|
||||
Select(mask, BroadcastInDim(src, sizes, index_broadcast_dims),
|
||||
Zeros(builder,
|
||||
ShapeUtil::MakeShape(input_shape.element_type(), sizes)));
|
||||
|
||||
|
@ -98,7 +98,7 @@ std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
|
||||
const XlaComputation& computation, Client* client,
|
||||
DebugOptions* debug_opts /*=nullptr*/) {
|
||||
CHECK(computation.proto().has_host_program_shape())
|
||||
<< "Computation should have progran shape.";
|
||||
<< "Computation should have program shape.";
|
||||
auto program_shape = computation.proto().host_program_shape();
|
||||
|
||||
std::vector<std::unique_ptr<GlobalData>> results;
|
||||
|
@ -329,15 +329,15 @@ StatusOr<int> LocalClient::ReplicaNumberToDeviceOrdinal(int replica_number) {
|
||||
}
|
||||
|
||||
StatusOr<TransferToServerResponse> LocalClient::TransferToLocalServer(
|
||||
const ::xla::BorrowingLiteral& literal, int device_oridinal) {
|
||||
const ::xla::BorrowingLiteral& literal, int device_ordinal) {
|
||||
const ::xla::Shape& shape = literal.shape();
|
||||
|
||||
TF_ASSIGN_OR_RETURN(
|
||||
::xla::ScopedShapedBuffer shaped_buffer,
|
||||
backend().transfer_manager()->AllocateScopedShapedBuffer(
|
||||
shape, backend().memory_allocator(), device_oridinal));
|
||||
shape, backend().memory_allocator(), device_ordinal));
|
||||
TF_ASSIGN_OR_RETURN(auto stream,
|
||||
mutable_backend()->BorrowStream(device_oridinal));
|
||||
mutable_backend()->BorrowStream(device_ordinal));
|
||||
TF_RETURN_IF_ERROR(backend().transfer_manager()->TransferLiteralToDevice(
|
||||
stream.get(), literal, shaped_buffer));
|
||||
std::vector<::xla::ScopedShapedBuffer> replicated_buffer;
|
||||
|
@ -122,7 +122,7 @@ class LocalClient : public Client {
|
||||
|
||||
// Transfer the BorrowingLiteral to the device with the given ordinal.
|
||||
StatusOr<TransferToServerResponse> TransferToLocalServer(
|
||||
const ::xla::BorrowingLiteral& literal, int device_oridinal);
|
||||
const ::xla::BorrowingLiteral& literal, int device_ordinal);
|
||||
|
||||
// Copy the data from the device contained in the given ShapedBuffer and
|
||||
// return as a Literal.
|
||||
|
@ -232,7 +232,7 @@ class XlaBuilder {
|
||||
// added operation.
|
||||
//
|
||||
// `remove_dynamic_dimensions` tells the builder whether to remove the
|
||||
// dyanmic dimensions information in all ops.
|
||||
// dynamic dimensions information in all ops.
|
||||
//
|
||||
// TODO(b/121223198): Delete `remove_dynamic_dimensions` and keeps the
|
||||
// dynamic dimensions information when XLA backend can handle dynamic
|
||||
@ -1194,7 +1194,7 @@ XlaOp Broadcast(XlaOp operand, absl::Span<const int64> broadcast_sizes);
|
||||
//
|
||||
// For example, say operand = {1, 2}, i.e., a 1D tensor in shape s32[2]; the
|
||||
// output shape is s32[2,2]:
|
||||
// - Specifying {1} as brodcast_dimension will generate output
|
||||
// - Specifying {1} as broadcast_dimension will generate output
|
||||
// {{1, 2},
|
||||
// {1, 2}}
|
||||
// - On the other hand, specifying {0} as broadcast_dimension
|
||||
@ -1469,7 +1469,7 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
|
||||
// two minor dimensions equal.
|
||||
// If `lower` is true, the data from the lower triangle is used; if false, the
|
||||
// upper triangle is used. The input data in the other triangle of the input
|
||||
// does not affect the output. Returns the output in the same lower/uppper
|
||||
// does not affect the output. Returns the output in the same lower/upper
|
||||
// triangle. The data returned in the other output triangle is arbitrary and
|
||||
// implementation-defined.
|
||||
//
|
||||
|
@ -292,7 +292,7 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) {
|
||||
TF_ASSERT_OK_AND_ASSIGN(auto module, BuildHloModule(&b));
|
||||
|
||||
// The binary operation has in-dim broadcast and degenerate broadcast, should
|
||||
// first do the in-dim broadcast then convert the degnerate broadcast into a
|
||||
// first do the in-dim broadcast then convert the degenerate broadcast into a
|
||||
// reshape and a broadcast.
|
||||
//
|
||||
// Expected:
|
||||
|
@ -328,7 +328,7 @@ static void AllocateFlags() {
|
||||
"use multi-threaded Eigen mode."),
|
||||
tensorflow::Flag("xla_gpu_cuda_data_dir",
|
||||
flag_values->mutable_xla_gpu_cuda_data_dir(),
|
||||
"If non-empty, speficies a local directory containing "
|
||||
"If non-empty, specifies a local directory containing "
|
||||
"ptxas and nvvm libdevice files; otherwise we use "
|
||||
"those from runfile directories."),
|
||||
tensorflow::Flag("xla_gpu_ftz",
|
||||
@ -347,7 +347,7 @@ static void AllocateFlags() {
|
||||
flag_values->xla_gpu_max_kernel_unroll_factor(),
|
||||
"Specify the maximum kernel unroll factor for the GPU backend."),
|
||||
tensorflow::Flag("xla_gpu_ptx_file", setter_for_xla_gpu_ptx_file, "",
|
||||
"If non-empty, speficies a file containing ptx to use. "
|
||||
"If non-empty, specifies a file containing ptx to use. "
|
||||
"The filename prefix must have the same pattern as PTX "
|
||||
"dumped by XLA. This allows to match one specific "
|
||||
"module. General workflow. Get the generated module "
|
||||
|
@ -52,7 +52,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags();
|
||||
// By default all passes have infinite fuel. You can restrict how much fuel a
|
||||
// pass has by specifying XLA_FLAGS=--xla_fuel=PASS1=NUM1,PASS2=NUM2,...
|
||||
//
|
||||
// If a user specifes --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
|
||||
// If a user specifies --xla_fuel=PASS=NUM but ConsumeFuel(PASS) is not called
|
||||
// before the program exits, we'll print a warning.
|
||||
//
|
||||
// We recommend as a convention you use a pass's name for the `pass` argument,
|
||||
@ -91,7 +91,7 @@ bool ConsumeFuel(absl::string_view pass,
|
||||
// startup.
|
||||
//
|
||||
// You may call this function twice in the same thread to reset its fuel pool
|
||||
// back to the intitial state.
|
||||
// back to the initial state.
|
||||
void ResetThreadLocalFuel();
|
||||
|
||||
} // namespace xla
|
||||
|
@ -21,7 +21,7 @@ limitations under the License.
|
||||
namespace xla {
|
||||
|
||||
// Create a default ExecutionOptions proto; this proto has its debug options
|
||||
// popupated to the default values taken from flags.
|
||||
// populated to the default values taken from flags.
|
||||
ExecutionOptions CreateDefaultExecutionOptions();
|
||||
|
||||
} // namespace xla
|
||||
|
@ -94,7 +94,7 @@ The participating cores can be configured by:
|
||||
in the same order of 1, 2, 3. Then, another AllToAll will be applied within
|
||||
replicas 4, 5, 0, and the concatenation order is also 4, 5, 0. If
|
||||
`replica_groups` is empty, all replicas belong to one group, in the
|
||||
concatenation order of their appearence.
|
||||
concatenation order of their appearance.
|
||||
|
||||
Prerequisites:
|
||||
|
||||
|
@ -248,7 +248,7 @@ Status MutableLiteralBase::CopySliceFromInternal(
|
||||
TF_RET_CHECK(src_base.size() == copy_size.size());
|
||||
|
||||
// Scan the source from minor, stepping in copy size blocks, then within
|
||||
// the index enumaration functor, do a strided copy advancing source index
|
||||
// the index enumeration functor, do a strided copy advancing source index
|
||||
// by one (walking through the minor dimension), and destination index by
|
||||
// proper stride size at the matching dimension.
|
||||
DimensionVector src_indexes(src_base.size(), 0);
|
||||
|
@ -810,7 +810,7 @@ class Literal : public MutableLiteralBase {
|
||||
Literal(const Shape& shape, bool allocate_arrays);
|
||||
Literal& operator=(Literal&& other);
|
||||
|
||||
// Similar to CopyFrom, but with move semantincs. The subshape of this literal
|
||||
// Similar to CopyFrom, but with move semantics. The subshape of this literal
|
||||
// rooted at 'dest_shape_index' must be *equal* to the shape 'src_literal'
|
||||
// (layouts and shapes must match), but need not be arrays. The memory
|
||||
// allocated in this literal for the subshape at dest_shape_index is
|
||||
@ -883,7 +883,7 @@ class BorrowingLiteral : public LiteralBase {
|
||||
BorrowingLiteral() : LiteralBase() {}
|
||||
|
||||
// 'src_buf_ptr' is not owned by this class and must outlive the
|
||||
// lifetime of this class. It points to an appropirately sized buffer with
|
||||
// lifetime of this class. It points to an appropriately sized buffer with
|
||||
// data interpretered as indicated by 'shape'.
|
||||
// This constructor is only used for array shapes.
|
||||
BorrowingLiteral(const char* src_buf_ptr, const Shape& shape);
|
||||
|
@ -433,7 +433,7 @@ class NearComparator {
|
||||
}
|
||||
} else if (IsInf(expected) || IsInf(actual)) {
|
||||
// If either the expected or actual value is infinity but not both,
|
||||
// then both absolute and relative error are regarded as inifity.
|
||||
// then both absolute and relative error are regarded as infinity.
|
||||
CHECK(!CompareEqual(expected, actual, {linear_index}));
|
||||
abs_error = std::numeric_limits<float>::infinity();
|
||||
rel_error = std::numeric_limits<float>::infinity();
|
||||
|
@ -1134,7 +1134,7 @@ TEST_F(LiteralUtilTest, CopyFromDifferentShapes) {
|
||||
TEST_F(LiteralUtilTest, F16) {
|
||||
// Verify that the internal data views are consistent and that they
|
||||
// are in little endian format
|
||||
// TODO - modify if we make the data format machine endianess dependent
|
||||
// TODO - modify if we make the data format machine endianness dependent
|
||||
Literal m1 = Literal::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2}));
|
||||
const char* d1 = reinterpret_cast<const char*>(m1.data<half>().data());
|
||||
EXPECT_EQ(d1[0], 0);
|
||||
|
@ -30,7 +30,7 @@ limitations under the License.
|
||||
// - <single-quote><characters string not containing nul or
|
||||
// single-quote><single_quote> in which case the effective value is the
|
||||
// string with the single-quotes removed
|
||||
// - <double-quote><character string not containing nul or unesecaped
|
||||
// - <double-quote><character string not containing nul or unescaped
|
||||
// double-quote><double_quote> in which case the effective value if the
|
||||
// string with the double-quotes removed, and escaped sequences of
|
||||
// <backslash><char> replaced by <char>.
|
||||
|
@ -73,14 +73,14 @@ static const char kTestFlagString[] =
|
||||
"--single_quoted='single quoted \\\\ \n \"' "
|
||||
"--double_quoted=\"double quoted \\\\ \n '\\\"\" ";
|
||||
|
||||
// Test that the environent variable is parsed correctly.
|
||||
// Test that the environment variable is parsed correctly.
|
||||
TEST(ParseFlagsFromEnv, Basic) {
|
||||
// Prepare environment.
|
||||
setenv("TF_XLA_FLAGS", kTestFlagString, true /*overwrite*/);
|
||||
TestParseFlagsFromEnv("(flags in environment variable)");
|
||||
}
|
||||
|
||||
// Test that a file named by the environent variable is parsed correctly.
|
||||
// Test that a file named by the environment variable is parsed correctly.
|
||||
TEST(ParseFlagsFromEnv, File) {
|
||||
// environment variables where tmp dir may be specified.
|
||||
static const char* kTempVars[] = {"TEST_TMPDIR", "TMP"};
|
||||
@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
|
||||
xla::int32 int_flag = 1;
|
||||
const std::vector<tensorflow::Flag> flag_list = {
|
||||
tensorflow::Flag("recursing", &recursing,
|
||||
"Whether the binary is being invoked recusively."),
|
||||
"Whether the binary is being invoked recursively."),
|
||||
tensorflow::Flag("int_flag", &int_flag, "An integer flag to test with"),
|
||||
};
|
||||
xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
|
||||
|
@ -551,7 +551,7 @@ PyLocalBuffer::DestructureTuple() {
|
||||
absl::MutexLock lock(&mu_);
|
||||
if (!on_host_shape_.IsTuple()) {
|
||||
return InvalidArgument(
|
||||
"Attemped to destructure a PyLocalBuffer that did not have a tuple "
|
||||
"Attempted to destructure a PyLocalBuffer that did not have a tuple "
|
||||
"shape; shape: %s",
|
||||
ShapeUtil::HumanString(on_host_shape_));
|
||||
}
|
||||
|
@ -345,7 +345,7 @@ PyTpuBuffer::DestructureTuple() {
|
||||
tensorflow::profiler::TraceMe traceme("PyTpuBuffer::DestructureTuple");
|
||||
if (!on_host_shape_.IsTuple()) {
|
||||
return InvalidArgument(
|
||||
"Attemped to destructure a PyTpuBuffer that did not have a tuple "
|
||||
"Attempted to destructure a PyTpuBuffer that did not have a tuple "
|
||||
"shape; shape: %s",
|
||||
ShapeUtil::HumanString(on_host_shape_));
|
||||
}
|
||||
|
@ -37,7 +37,7 @@
|
||||
#include "tensorflow/compiler/xla/xla_data.pb.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
|
||||
// This API is EXPERIMENTAL and under active developement. It is subject to
|
||||
// This API is EXPERIMENTAL and under active development. It is subject to
|
||||
// change without notice.
|
||||
|
||||
namespace tpu_driver {
|
||||
|
@ -1526,7 +1526,7 @@ class ComputationBuilder(object):
|
||||
ConvWithGeneralPadding.
|
||||
feature_group_count: number of feature groups for grouped convolution.
|
||||
batch_group_count: number of batch groups for grouped convolution.
|
||||
Returns: a XlaOp representing the ConvGenralDilated operation.
|
||||
Returns: a XlaOp representing the ConvGeneralDilated operation.
|
||||
"""
|
||||
if dimension_numbers is None:
|
||||
dimension_numbers = self._GetConvDimensionNumbers(len(window_strides))
|
||||
|
@ -25,10 +25,10 @@ import numpy as _np # Avoids becoming a part of public Tensorflow API.
|
||||
from tensorflow.compiler.xla import xla_data_pb2
|
||||
from tensorflow.python.framework import dtypes
|
||||
|
||||
# Records corresponsence between a XLA primitive type and Python/Numpy types.
|
||||
# Records correspondence between a XLA primitive type and Python/Numpy types.
|
||||
#
|
||||
# primitive_type: value of type xla_data_pb2.PrimitiveType
|
||||
# numpy_dtype: corresponsing Numpy "dtype" (like np.float32)
|
||||
# numpy_dtype: corresponding Numpy "dtype" (like np.float32)
|
||||
# literal_field_name: name of the field in the LiteralProto message elements
|
||||
# of this type go into.
|
||||
# literal_field_type: type of the field named 'literal_field_name'.
|
||||
|
@ -673,7 +673,7 @@ Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) {
|
||||
bitcast, HloInstruction::CreateBitcast(bitcast->shape(), op));
|
||||
}
|
||||
// All bitcasts can be eliminated (assuming layout constraints are
|
||||
// satisified).
|
||||
// satisfied).
|
||||
ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0));
|
||||
return Status::OK();
|
||||
}
|
||||
@ -692,7 +692,7 @@ Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) {
|
||||
return ReplaceWithNewInstruction(
|
||||
copy, HloInstruction::CreateUnary(copy->shape(), HloOpcode::kCopy, op));
|
||||
}
|
||||
// All copies can be eliminated (assuming layout constraints are satisified).
|
||||
// All copies can be eliminated (assuming layout constraints are satisfied).
|
||||
if (ReplaceInstructionIfSameShape(copy, copy->mutable_operand(0))) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -2735,7 +2735,7 @@ Status AlgebraicSimplifierVisitor::HandlePower(HloInstruction* power) {
|
||||
|
||||
// Don't perform this optimization if either of the exponents is complex; this
|
||||
// identity is true only for real-valued exponents. In addition, we cowardly
|
||||
// refuse to do this transformation if the two expontents have different
|
||||
// refuse to do this transformation if the two exponents have different
|
||||
// element types.
|
||||
if (lhs->opcode() == HloOpcode::kPower &&
|
||||
!ShapeUtil::ElementIsComplex(lhs->operand(1)->shape()) &&
|
||||
|
@ -4756,7 +4756,7 @@ TEST_P(DotStrengthReductionTest, DotStrengthReduction) {
|
||||
const bool computation_should_be_modified =
|
||||
dot_should_be_transformed || (transpose_lhs && transpose_rhs);
|
||||
EXPECT_EQ(changed, computation_should_be_modified);
|
||||
// The second pass of algebriac simplifer will remove dots without
|
||||
// The second pass of algebraic simplifier will remove dots without
|
||||
// non-contracting dimensions or contracting dimensions.
|
||||
TF_ASSERT_OK_AND_ASSIGN(changed, simplifier.Run(module.get()));
|
||||
EXPECT_EQ(changed, computation_should_be_modified);
|
||||
|
@ -38,7 +38,7 @@ namespace {
|
||||
|
||||
class BatchNormExpanderTest : public HloTestBase {
|
||||
protected:
|
||||
// BatchNorm should have a dynamic sized dividor for mean operations.
|
||||
// BatchNorm should have a dynamic sized divider for mean operations.
|
||||
int64 CountGetDimensionSize(const HloModule& module) {
|
||||
int64 count = 0;
|
||||
for (HloComputation* comp : module.computations()) {
|
||||
|
@ -1608,7 +1608,7 @@ StatusOr<std::unique_ptr<BufferAssignment>> BufferAssigner::CreateAssignment(
|
||||
/*is_thread_local=*/false, &buffers_to_assign_sequentially,
|
||||
assignment.get()));
|
||||
// Assign buffers with sequential ordering, if any. If all global
|
||||
// computations are sequential, we can run heap simuation on the whole
|
||||
// computations are sequential, we can run heap simulation on the whole
|
||||
// module, which reduces memory usage.
|
||||
const bool run_whole_module_heap_simulation =
|
||||
buffers_to_assign_sequentially.size() == global_computations.size();
|
||||
|
@ -770,7 +770,7 @@ TEST_F(BufferAssignmentTest, PresetAssignments) {
|
||||
}
|
||||
|
||||
TEST_F(BufferAssignmentTest, PresetAssignmentsWhile) {
|
||||
// Tests preset assignments when there is no 1-to-1 corrspondance between
|
||||
// Tests preset assignments when there is no 1-to-1 correspondence between
|
||||
// HloValue and HloBuffer (i.e., a while loop).
|
||||
auto module = CreateNewVerifiedModule();
|
||||
Shape f32vec10_color1 =
|
||||
|
@ -160,7 +160,7 @@ class BufferValue {
|
||||
BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id);
|
||||
|
||||
private:
|
||||
// The definining instruction and index are not stored here; they can be found
|
||||
// The defining instruction and index are not stored here; they can be found
|
||||
// in the LogicalBuffer and HloValue subclasses. This class exists only to
|
||||
// support migrations from TuplePointsToAnalysis to HloDataflowAnalysis, by
|
||||
// allowing abstract use of LogicalBuffer or HloValue. After those migrations
|
||||
|
@ -27,7 +27,7 @@ namespace {
|
||||
|
||||
// Traverses the callee computation, inlining cloned nodes into the caller
|
||||
// computation and connecting them to producers/consumers appropriately.
|
||||
// When the traversal has completed, the provided call instruction is entriely
|
||||
// When the traversal has completed, the provided call instruction is entirely
|
||||
// replaced in the caller's graph.
|
||||
class SubcomputationInsertionVisitor : public DfsHloVisitorWithDefault {
|
||||
public:
|
||||
|
@ -93,7 +93,7 @@ std::pair<XlaOp, XlaOp> CholeskyUnblocked(
|
||||
Zeros(body_builder,
|
||||
ShapeUtil::MakeShape(a_shape.element_type(), matrix_dims));
|
||||
// L * L.T, This matrix has of a lot of multiplying with zero
|
||||
// (namely, L[:, j:] = 0) and redudant computation, but it is faster
|
||||
// (namely, L[:, j:] = 0) and redundant computation, but it is faster
|
||||
// than slice.
|
||||
auto l_square = BatchDot(body_l, false, body_l, true, precision);
|
||||
|
||||
|
@ -32,7 +32,7 @@ namespace xla {
|
||||
|
||||
enum class ReductionKind { SUM, PRODUCT, MIN, MAX };
|
||||
|
||||
// Atempts to match computation to one of the possible cases in ReductionKind.
|
||||
// Attempts to match computation to one of the possible cases in ReductionKind.
|
||||
absl::optional<ReductionKind> MatchReductionComputation(
|
||||
const HloComputation* computation);
|
||||
|
||||
|
@ -47,7 +47,7 @@ namespace xla {
|
||||
// The following types are used for ahead of time compilation.
|
||||
|
||||
// Contains the object file data created as a result of ahead-of-time
|
||||
// compuation.
|
||||
// computation.
|
||||
using ObjectFileData = std::vector<char>;
|
||||
|
||||
// Abstract superclass describing the result of an ahead-of-time compilation.
|
||||
|
@ -71,7 +71,7 @@ class ComputationPlacer {
|
||||
|
||||
// Returns the device id assigned to the given replica and computation
|
||||
// instance for [replica_count x computation_count] setup. The returned device
|
||||
// id must match the assignement from PlaceReplicatedComputation().
|
||||
// id must match the assignment from PlaceReplicatedComputation().
|
||||
virtual StatusOr<int> DeviceId(int replica, int computation,
|
||||
int replica_count, int computation_count);
|
||||
|
||||
|
@ -189,7 +189,7 @@ StatusOr<bool> TryRemoveUnusedConditionalOperands(
|
||||
}
|
||||
for (HloInstruction* user : param->users()) {
|
||||
// If the user is not a get tuple element, assume it is unsafe to remove
|
||||
// elemnts from the tuple.
|
||||
// elements from the tuple.
|
||||
if (user->opcode() != HloOpcode::kGetTupleElement) {
|
||||
return false;
|
||||
}
|
||||
|
@ -393,7 +393,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
|
||||
const int64 depthwise_multiplier =
|
||||
filter->shape().dimensions(kernel_output_feature_dim) / group_count;
|
||||
// Split the kernel output feature dimension into group count and
|
||||
// depthwise mutlipler.
|
||||
// depthwise mutilipler.
|
||||
for (int64 i = 0; i < filter->shape().rank(); ++i) {
|
||||
if (i == kernel_output_feature_dim) {
|
||||
new_filter_dimension.push_back(group_count);
|
||||
@ -439,7 +439,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
|
||||
new_dim->set_window_dilation(1);
|
||||
new_dim->set_base_dilation(1);
|
||||
|
||||
// Split the output feature dimension into and output featrue of group
|
||||
// Split the output feature dimension into and output feature of group
|
||||
// count and depthwise multipler as an output spatial dimension.
|
||||
std::vector<int64> new_output_dimension;
|
||||
new_output_dimension.reserve(convolution->shape().rank() + 1);
|
||||
|
@ -1177,7 +1177,7 @@ TEST_F(WhileCopyInsertionTest, InitPointsToNonDistinct) {
|
||||
|
||||
InsertCopies(module_.get());
|
||||
|
||||
// The entry computation requires two copies to resolve the non-disinctness of
|
||||
// The entry computation requires two copies to resolve the non-distinctness of
|
||||
// two init elements and the constant passed in as one of the init
|
||||
// elements. Either element can be copied for the distinctness issue.
|
||||
EXPECT_EQ(CountCopies(*module_->entry_computation()), 2);
|
||||
@ -1996,7 +1996,7 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) {
|
||||
tensorflow::testing::StopTiming();
|
||||
|
||||
// Each body receives of copy of two of the parameters (the corresponding
|
||||
// elements in the body are modifed), and there is one copy in each body.
|
||||
// elements in the body are modified), and there is one copy in each body.
|
||||
ASSERT_EQ(CountCopies(module), 3 * num_whiles);
|
||||
}
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ Status CpuCompiler::RunHloPassesAfterLayoutAssn(
|
||||
// duplicate or NOPs, so remove them with algebraic simplification and CSE.
|
||||
{
|
||||
auto& pass = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
|
||||
"simplification after layout assignement");
|
||||
"simplification after layout assignment");
|
||||
pass.AddInvariantChecker<HloVerifier>(
|
||||
/*layout_sensitive=*/true,
|
||||
/*allow_mixed_precision=*/false,
|
||||
|
@ -327,7 +327,7 @@ StatusOr<ExecutionOutput> CpuExecutable::ExecuteAsyncOnStream(
|
||||
//
|
||||
// Logically we want this lambda to capture `buffers` by move, ultimately our
|
||||
// functor needs to be wrapped in an std::function, and that requires its
|
||||
// functor to be copyable. Thus we perpitrate the hack of capturing buffers
|
||||
// functor to be copyable. Thus we perpetrate the hack of capturing buffers
|
||||
// "by shared pointer".
|
||||
//
|
||||
// We also need to change the types of some of the variables we capture:
|
||||
|
@ -28,7 +28,7 @@ namespace cpu {
|
||||
|
||||
// We want to change the layout of constant arrays to be column major when all
|
||||
// of their users are dot operations that can be made faster with the flipped
|
||||
// layout. To avoid going quadriatic over the # of instructions, we cache this
|
||||
// layout. To avoid going quadratic over the # of instructions, we cache this
|
||||
// property in should_make_rhs_col_major -- it maps a constant to true if all of
|
||||
// the users of said constant are dot operations that can be sped up. This
|
||||
// cache is populated lazily as we encounter dot operations traversing the
|
||||
|
@ -84,7 +84,7 @@ enum class DotImplementationStrategy {
|
||||
// supported.
|
||||
kTiledLlvmIrGemv,
|
||||
|
||||
// The dot operation is lowered into LLVM IR that implemetns a tiled
|
||||
// The dot operation is lowered into LLVM IR that implements a tiled
|
||||
// Matrix*Matrix operation. No fusions are supported. The two inputs
|
||||
// and the output have to be row major.
|
||||
kTiledLlvmIrGemm,
|
||||
|
@ -63,7 +63,7 @@ enum class DotImplementationStrategy {
|
||||
// supported.
|
||||
kTiledLlvmIrGemv,
|
||||
|
||||
// The dot operation is lowered into LLVM IR that implemetns a tiled
|
||||
// The dot operation is lowered into LLVM IR that implements a tiled
|
||||
// Matrix*Matrix operation. No fusions are supported. The two inputs
|
||||
// and the output have to be row major.
|
||||
kTiledLlvmIrGemm,
|
||||
|
@ -1159,7 +1159,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) {
|
||||
/*instruction=*/*convolution, /*operands=*/{lhs, rhs},
|
||||
/*supported_types=*/{F16, F32, F64, C64, C128}));
|
||||
|
||||
// TODO(tonywy): Add PotentiallyImplementedAsMKLCovolution to support
|
||||
// TODO(tonywy): Add PotentiallyImplementedAsMKLConvolution to support
|
||||
// different data layouts.
|
||||
if (PotentiallyImplementedAsEigenConvolution(*convolution,
|
||||
target_machine_features_)) {
|
||||
|
@ -294,7 +294,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
|
||||
absl::string_view name);
|
||||
|
||||
// Emits a call to a "global" function (e.g. to the computation nested within
|
||||
// a kWhile or a kCall). Buffer assignment unabiguously assignes buffers to
|
||||
// a kWhile or a kCall). Buffer assignment unabiguously assigns buffers to
|
||||
// the parameters and return values for these computations so there is no need
|
||||
// to explicitly pass parameters or return results.
|
||||
void EmitGlobalCall(const HloComputation& callee, absl::string_view name);
|
||||
@ -366,7 +366,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
|
||||
// without generating IR with illegal (e.g. excessively large or
|
||||
// non-power-of-two) vector types. We do this by introducing a layer of
|
||||
// abstraction: we introduce a high level vector-like concept called a
|
||||
// "sharded vector" that models data paralleism, and is mapped to a sequence
|
||||
// "sharded vector" that models data parallelism, and is mapped to a sequence
|
||||
// scalar and vector llvm::Value s.
|
||||
//
|
||||
// For example, we can represent 29 f32 elements by a sharded vector mapped to
|
||||
|
@ -185,7 +185,7 @@ llvm::Value* GenerateVF32Exp(llvm::IRBuilder<>* b, llvm::Value* input,
|
||||
// value of n clamped to [-127, 127]. In the case where n' = 127, `a` can grow
|
||||
// up to as large as 88.8 - 127 * log(2) which is about 0.7703. Even though
|
||||
// this value of `a` is outside our previously specified range, e^a will still
|
||||
// only have a relative error of approximetely 2^-16 at worse. In practice
|
||||
// only have a relative error of approximately 2^-16 at worse. In practice
|
||||
// this seems to work well enough; it passes our exhaustive tests, breaking
|
||||
// only one result, and by one ulp (we return exp(88.7228394) = max-float but
|
||||
// we should return inf).
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user