minor spelling tweaks

This commit is contained in:
Kazuaki Ishizaki 2020-03-16 18:49:54 +09:00
parent 092ae742c3
commit 51d76d6f72
31 changed files with 60 additions and 60 deletions

View File

@ -38,7 +38,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
}
// There is no easy way to pass a parameter into the TfLiteDelegate's
// 'prepare' function, so we keep a global map for testing purpused.
// 'prepare' function, so we keep a global map for testing purposed.
// To avoid collisions use: GetPrepareFunction<__LINE__>().
std::map<int, std::vector<int>>* GetGlobalOpLists() {
static auto* op_list = new std::map<int, std::vector<int>>;

View File

@ -113,7 +113,7 @@ const TfLiteGpuDelegateOptionsV2 kDefaultOptions =
TfLiteGpuDelegateOptionsV2Default();
```
Similar for `NewTfLiteMetalDelgate()`:
Similar for `NewTfLiteMetalDelegate()`:
```c++
const TfLiteMetalDelegateOptions kDefaultOptions = {

View File

@ -124,9 +124,9 @@ class ProfilingCommandQueue : public CLCommandQueue {
double GetQueueExecutionTimeMs() const;
// Difference from GetQueueExecutionTimeMs is that this number doesn't include
// time between kernels(kernels launchs or preparing) on GPU. Usually, this
// time between kernels(kernels launches or preparing) on GPU. Usually, this
// time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
// spend on something else(maybe kernels launchs or preparing)
// spend on something else(maybe kernels launches or preparing)
double GetSumOfEventsTimeMs() const;
// This label will be used for all subsequent dispatches.

View File

@ -64,7 +64,7 @@ class CLProgram {
// Return the cl_device_id associated with the program object.
// This can be the device associated with context on which the program object
// has been created or can be device that was specified when a progam object
// has been created or can be device that was specified when a program object
// was created using clCreateProgramWithBinary.
cl_device_id GetDeviceId() const { return device_id_; }

View File

@ -46,7 +46,7 @@ Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
bool IsEglSyncFromClEventSupported();
// Creates CL event from EGL sync.
// Created event could only be comsumed by AcquiredGlObject::Acquire call as
// Created event could only be consumed by AcquiredGlObject::Acquire call as
// a 'wait_event'.
Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
CLEvent* event);

View File

@ -47,7 +47,7 @@ struct CLNode {
// for every operation.
std::vector<int2> ranges;
// Mostly for debug purposess.
// Mostly for debug purposes.
std::string name;
CLNode() = default;
@ -129,8 +129,8 @@ class InferenceContext {
CalculationsPrecision precision_;
TensorStorageType storage_type_;
// Directly mapped nodes from graph, but some of them "inactiv" due
// to fusion (inactiv = fused).
// Directly mapped nodes from graph, but some of them "inactive" due
// to fusion (inactive = fused).
// Memory is allocated only once, in ConvertOperations, and is not modified
// anywhere.
std::vector<CLNode> nodes_;

View File

@ -29,7 +29,7 @@ namespace {
// vec mat mult) on 4 parts to create more threads
// tid.y thread process every 4-th element in vec vec dot
// Good results for ~1024 x 1024 sizes, for other can be written more
// otimized shaders
// optimized shaders
std::string GetFullyConnectedKernelCode(
const OperationDef& op_def, const LinearStorage& biases,

View File

@ -25,7 +25,7 @@ namespace gpu {
namespace cl {
namespace {
std::string GetMaxUnoolingKernelCode(
std::string GetMaxUnpoolingKernelCode(
const OperationDef& op_def, const CLDevice& device,
const std::vector<ElementwiseOperation*>& linked_operations) {
TensorCodeGenerator src("src_data",
@ -102,7 +102,7 @@ std::string GetMaxUnoolingKernelCode(
return c;
}
std::string GetMaxUnooling3DKernelCode(
std::string GetMaxUnpooling3DKernelCode(
const OperationDef& op_def, const CLDevice& device,
const std::vector<ElementwiseOperation*>& linked_operations) {
TensorCodeGenerator src(
@ -219,7 +219,7 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {
}
Status MaxUnpooling::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnoolingKernelCode(
const auto code = GetMaxUnpoolingKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,
@ -292,7 +292,7 @@ MaxUnpooling3D& MaxUnpooling3D::operator=(MaxUnpooling3D&& kernel) {
}
Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
const auto code = GetMaxUnooling3DKernelCode(
const auto code = GetMaxUnpooling3DKernelCode(
definition_, *creation_context.device, linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,

View File

@ -95,7 +95,7 @@ std::string GetStridedSliceCode(
return c;
}
bool Is4Alighed(const SliceAttributes& attr) {
bool Is4Aligned(const SliceAttributes& attr) {
return attr.strides.c == 1 && attr.starts.c % 4 == 0;
}
@ -129,7 +129,7 @@ int4 GetOffset(const SliceAttributes& attr, int src_width, int src_height,
offset.z = src_channels + attr.ends.c;
}
}
if (Is4Alighed(attr)) {
if (Is4Aligned(attr)) {
offset.z /= 4;
}
if (attr.strides.b > 0) {
@ -167,7 +167,7 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) {
}
Status StridedSlice::Compile(const CreationContext& creation_context) {
const auto code = GetStridedSliceCode(definition_, Is4Alighed(attributes_),
const auto code = GetStridedSliceCode(definition_, Is4Aligned(attributes_),
linked_operations_);
return creation_context.cache->GetOrCreateCLKernel(
code, "main_function", *creation_context.context,

View File

@ -640,7 +640,7 @@ extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
// For convinient image creation
// For convenient image creation
// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
// otherwise it will use legacy clCreateImage2D
cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,

View File

@ -30,7 +30,7 @@ enum class CalculationsPrecision { F32, F32_F16, F16 };
// F32_F16 - as F16, but some operations (Convolution,
// DepthWiseConvolution, FullyConnected, ConvolutionTransposed)
// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
// than converts this partial sum to F32 and add to acumulator.
// than converts this partial sum to F32 and add to accumulator.
DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);

View File

@ -475,7 +475,7 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
case TensorStorageType::SINGLE_TEXTURE_2D: {
if (slices != 1) {
return InvalidArgumentError(absl::StrCat(
"SINGLE_TEXTURE_2D support only cnannels in range [1-4], but ",
"SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
shape.c, "was provided"));
}
cl_image_desc desc;

View File

@ -82,7 +82,7 @@ enum class MemoryStrategy {
Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
ObjectsAssignment<size_t>* assignment);
// Calculates the assignement of shared objects to given tensors, including
// Calculates the assignment of shared objects to given tensors, including
// objects' sizes. Below there are specializations for different types, that
// support more memory strategies.
// If reallocation_graph is provided, assignment of shared objects support
@ -130,7 +130,7 @@ Status AssignObjectsToTensors(
MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment,
const UsageGraph* reallocation_graph);
// Calculates the assignement of tensors to offsets, considering those tensors
// Calculates the assignment of tensors to offsets, considering those tensors
// are going to be allocated in one continuous memory block.
Status AssignOffsetsToTensors(
const std::vector<TensorUsageRecord<size_t>>& usage_records,

View File

@ -67,7 +67,7 @@ Status GreedyBySizeAssignment(
assignment->offsets.resize(num_tensors);
assignment->total_size = 0;
// Ordered records are to be sorted by size of corrseponding tensor.
// Ordered records are to be sorted by size of corresponding tensor.
std::vector<TensorUsageWithIndex<size_t>> ordered_records;
for (size_t i = 0; i < num_tensors; ++i) {
ordered_records.emplace_back(&usage_records[i], i);
@ -133,7 +133,7 @@ Status GreedyBySizeAssignment(
// - We have tensor usage records of all intermideate tensors as an input. Each
// record consists of tensor size, first and last tasks, that use it. Let's call
// [first_task..last_task] a tensor usage interval;
// - Distance between two usage intervals is the absoulte difference between
// - Distance between two usage intervals is the absolute difference between
// closest tasks in their intervals. If two usage intervals don't intersect,
// than the distance between them is positive;
// - Calculate positional maximums vector, e.g. the vector of lower bounds on

View File

@ -36,7 +36,7 @@ namespace gpu {
// gap;
// - If such a gap has been found, current tensor should be allocated into this
// gap. Otherwise we can allocate it after the rightmost tensor, which usage
// interval intersects with usage inteval of current tensor. So we assign
// interval intersects with usage interval of current tensor. So we assign
// corresponding offset to current tensor and the tensor becomes assigned.
Status GreedyBySizeAssignment(
const std::vector<TensorUsageRecord<size_t>>& usage_records,
@ -47,7 +47,7 @@ Status GreedyBySizeAssignment(
// - We have tensor usage records of all intermideate tensors as an input. Each
// record consists of tensor size, first and last tasks, that use it. Let's call
// [first_task..last_task] a tensor usage interval;
// - Distance between two usage intervals is the absoulte difference between
// - Distance between two usage intervals is the absolute difference between
// closest tasks in their intervals. If two usage intervals don't intersect,
// than the distance between them is positive;
// - Calculate positional maximums vector, e.g. the vector of lower bounds on

View File

@ -46,7 +46,7 @@ bool CompareBySize(const TensorUsageWithIndex<size_t>& first,
const TensorUsageWithIndex<size_t>& second);
// TaskProfile is a vector with information about all intermediate tensors, that
// should exist in memory during the executon of the task. Elements of the
// should exist in memory during the execution of the task. Elements of the
// vector must be sorted in non-increasing order of corresponding tensors sizes.
using TaskProfile = std::vector<TensorUsageWithIndex<size_t>>;

View File

@ -29,7 +29,7 @@ namespace gpu {
namespace testing {
// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
// an appropriate operations resolver. If delegate is nullptr, infererence will
// an appropriate operations resolver. If delegate is nullptr, inference will
// be done only on CPU.
Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
TfLiteDelegate* delegate,
@ -38,7 +38,7 @@ Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
std::vector<TensorFloat32>* outputs);
// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
// builtin operations resolver. If delegate is nullptr, infererence will
// builtin operations resolver. If delegate is nullptr, inference will
// be done only on CPU.
Status InterpreterInvoke(const ::tflite::Model* model, TfLiteDelegate* delegate,
const std::vector<TensorFloat32>& inputs,

View File

@ -126,7 +126,7 @@ std::vector<int> GetDivisorsForRange(int number, int range) {
std::vector<int> GetPossibleSizes(int number,
WorkGroupSizeAlignment z_alignment) {
if (z_alignment == WorkGroupSizeAlignment::PRECISE) {
// we will use for potential sizes, sizes that cover grid preciselly
// we will use for potential sizes, sizes that cover grid precisely
// work group size * k (k is integer) == grid_size
return GetDivisors(number);
} else {

View File

@ -79,7 +79,7 @@ typedef struct {
// each time inference engine needs to make a decision, it uses
// ordered priorities to do so.
// For example:
// MAX_PRECISION at priority1 would not allow to decrease presision,
// MAX_PRECISION at priority1 would not allow to decrease precision,
// but moving it to priority2 or priority3 would result in F16 calculation.
//
// Priority is defined in TfLiteGpuInferencePriority.

View File

@ -60,7 +60,7 @@ class VariableAccessor : public InlineRewrite {
// Returns const variables that need to be inlined in the a shader's code.
std::string GetConstDeclarations() const;
// Returns shared varaible declarations that need to be inlined.
// Returns shared variable declarations that need to be inlined.
std::string GetSharedVariableDeclarations() const;
// Returns uniform parameter declarations that need to be inlined.

View File

@ -131,7 +131,7 @@ Status GetEglError() {
case EGL_CONTEXT_LOST:
return InternalError(
"A power management event has occurred. The application must destroy "
"all contexts and reinitialise OpenGL ES state and objects to "
"all contexts and reinitialize OpenGL ES state and objects to "
"continue rendering.");
}
return UnknownError("EGL error: " + std::to_string(error));

View File

@ -77,7 +77,7 @@ class GlSync {
// Waits until GPU is done with processing.
Status GlSyncWait();
// Waits until all comands are flushed and then performs active waiting by
// Waits until all commands are flushed and then performs active waiting by
// spinning a thread and checking sync status. It leads to shorter wait time
// (up to tens of ms) but consumes more CPU.
Status GlActiveSyncWait();

View File

@ -74,7 +74,7 @@ TEST(AddTest, InputTensorAndScalar) {
Pointwise(FloatNear(1e-6), {-1.9, 0.3, 0.8, 0.9, 1.2, 2.1}));
}
TEST(AddTest, InputTensorWithConstandBroadcast) {
TEST(AddTest, InputTensorWithConstantBroadcast) {
TensorRef<BHWC> input;
input.type = DataType::FLOAT32;
input.ref = 0;

View File

@ -33,8 +33,8 @@ namespace {
// (b/117291356).
// Describes the ideal convolution for the specific operation case
// Case here means specific "kernel + strides" conbination for specific
// operatoins type, not sizes of input and output tensors, they can be any.
// Case here means specific "kernel + strides" combination for specific
// operations type, not sizes of input and output tensors, they can be any.
struct IdealByCase {
bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
HW in_strides) const {

View File

@ -129,7 +129,7 @@ uint32_t BufferUseCount(ValueId id,
}
// Examines if the second operation can be linked to the first one. Linking may
// be skipped in the situation when conflic may happen: if first operation's
// be skipped in the situation when conflict may happen: if first operation's
// output is used by more than 1 other operation.
bool CanFuseOperations(const ComputeTaskDescriptorPtr first,
const ComputeTaskDescriptorPtr second,
@ -444,9 +444,9 @@ ComputeTaskDescriptorPtr NonLinkableStub(int operation_id, ValueId input_id,
}
ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
auto fused_desciptor = std::make_shared<ComputeTaskDescriptor>();
auto fused_descriptor = std::make_shared<ComputeTaskDescriptor>();
// The id of fused descriptor is the id of the first descriptor in the list.
fused_desciptor->id = chain.front()->id;
fused_descriptor->id = chain.front()->id;
FusionSequence sequence;
if (chain.front()->is_linkable) {
// The first task is linkable so it contains only linkable code. Insert
@ -503,7 +503,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
input_index++;
fused_desciptor->input_buffers.push_back({buffer.id, ""});
fused_descriptor->input_buffers.push_back({buffer.id, ""});
}
}
// We have an output id that is the input for the next task.
@ -517,7 +517,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
immutable_index++;
fused_desciptor->immutable_buffers.push_back(buffer);
fused_descriptor->immutable_buffers.push_back(buffer);
}
for (auto buffer : desc->uniform_buffers) {
@ -527,7 +527,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
call_arguments += ", buffer" + index;
uniform_index++;
fused_desciptor->uniform_buffers.push_back({"", buffer.data_function});
fused_descriptor->uniform_buffers.push_back({"", buffer.data_function});
}
if (desc->is_linkable) {
@ -539,7 +539,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
}
ComputeTaskDescriptorPtr non_linkable = sequence.front();
fused_desciptor->shader_source =
fused_descriptor->shader_source =
absl::Substitute(non_linkable->shader_source, function_code,
buffer_declarations, call_code);
std::vector<ValueId> alias;
@ -547,13 +547,13 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
for (int i = 0; i < chain.size() - 1; i++) {
alias.push_back(chain[i]->output_buffer.id);
}
fused_desciptor->output_buffer = {
fused_descriptor->output_buffer = {
fused_id, "", non_linkable->output_buffer.dimensions_function, alias};
fused_desciptor->resize_function = non_linkable->resize_function;
fused_descriptor->resize_function = non_linkable->resize_function;
for (const auto& desc : sequence) {
fused_desciptor->description += desc->description + "_";
fused_descriptor->description += desc->description + "_";
}
return fused_desciptor;
return fused_descriptor;
}
} // namespace

View File

@ -35,7 +35,7 @@ limitations under the License.
/// 2. Model compilation. Global list of ComputeTaskDescriptors is transformed
/// into the sorted list of sets of descriptors. A set can be transformed
/// later into a single GPU task.
/// 3. GPU compute tasks generation. Shader code generation happes here.
/// 3. GPU compute tasks generation. Shader code generation happens here.
/// 4. Intermediate resource allocation.
/// Inference.
@interface TFLInferenceContext : NSObject
@ -72,11 +72,11 @@ limitations under the License.
/// Inserts all GPU compute tasks into the command encoder.
/// @param inputOutputBuffers Must be created and passed into the method with pairs ID:buffer
/// @param encoderBlock User-defined block to take control over command encoder. Can be nil.
/// The block can be used, for example, for fine-graned benchmarking where end encoding
/// The block can be used, for example, for fine-grained benchmarking where end encoding
/// is performed and command buffer is committed with completion block. A new command
/// buffer must be created and new command encoder must be returned by the block.
/// The block is called after every dispatch encoding.
/// @discussion No GPU sychronization functions are used inside. All GPU resources must be created
/// @discussion No GPU synchronization functions are used inside. All GPU resources must be created
/// with the same device which has been used in compileModelWithDevice() method.
- (void)encodeWithEncoder:(id<MTLComputeCommandEncoder>)commandEncoder
inputOutputBuffers:(const std::map<::tflite::gpu::ValueId, id<MTLBuffer>>&)inputOutputBuffers

View File

@ -90,7 +90,7 @@ using ::tflite::gpu::metal::SingleOpModel;
XCTAssertTrue(status.ok(), @"%s", status.error_message().c_str());
}
- (void)testInputTensorWithConstandBroadcast {
- (void)testInputTensorWithConstantBroadcast {
TensorRef<BHWC> input;
input.type = DataType::FLOAT32;
input.ref = 0;

View File

@ -24,7 +24,7 @@ const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig =
#
# The test_id is test_suite_name / test_name, this differs from the
# name used by the build because of the / separator instead of .
# Parametrised tests names are composed by the base test name / test / ordinal
# Parameterized tests names are composed by the base test name / test / ordinal
# the ordinal is the position in the list of parameters generated by the
# cardinal product of all the different parameter sets
@ -39,7 +39,7 @@ const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig =
## Test Arguments
#
# The test can be parametrised with the minimum Android SDK version
# The test can be parameterized with the minimum Android SDK version
# to apply the acceleration validation for.
# If omitted will use 27

View File

@ -155,7 +155,7 @@ bool IsScalarInputSupported(int builtin_code) {
}
}
// Check if the operation requires explict conversion from int8 to uint8 values.
// Check if the operation requires explicit conversion from int8 to uint8 values.
bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
const TfLiteNode* node) {
const int input_id = node->inputs->data[0];

View File

@ -172,7 +172,7 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
bool disallow_nnapi_cpu;
// Tensor to ANeuralNetworksMemory mapping.
std::vector<MemoryRegistration> tensor_memory_map;
// Constains a non zero value if any NNAPI method call
// Contains a non zero value if any NNAPI method call
// operation returned a non zero result code.
int nnapi_errno;
// Cache of kernels already built in StatefulNnApiDelegate::DoPrepare

View File

@ -4811,17 +4811,17 @@ class PadV2OpConstModel : public PadOpModel<T1> {
};
// Test case where paddings is a non-const tensor.
template <typename RegularInputOuput>
class PadV2OpDynamicModel : public PadOpModel<RegularInputOuput> {
template <typename RegularInputOutput>
class PadV2OpDynamicModel : public PadOpModel<RegularInputOutput> {
public:
PadV2OpDynamicModel(const TensorData& input,
std::initializer_list<int> paddings_shape,
RegularInputOuput constant_values,
RegularInputOutput constant_values,
const TensorData& output) {
this->input_ = this->AddInput(input);
this->paddings_ = this->AddInput(TensorType_INT32);
this->constant_values_ = this->AddConstInput(
GetTensorType<RegularInputOuput>(), {constant_values}, {1});
GetTensorType<RegularInputOutput>(), {constant_values}, {1});
this->output_ = this->AddOutput(output);
this->SetBuiltinOp(BuiltinOperator_PADV2, BuiltinOptions_PadV2Options,