minor spelling tweaks
This commit is contained in:
parent
092ae742c3
commit
51d76d6f72
@ -38,7 +38,7 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
|
||||
}
|
||||
|
||||
// There is no easy way to pass a parameter into the TfLiteDelegate's
|
||||
// 'prepare' function, so we keep a global map for testing purpused.
|
||||
// 'prepare' function, so we keep a global map for testing purposed.
|
||||
// To avoid collisions use: GetPrepareFunction<__LINE__>().
|
||||
std::map<int, std::vector<int>>* GetGlobalOpLists() {
|
||||
static auto* op_list = new std::map<int, std::vector<int>>;
|
||||
|
@ -113,7 +113,7 @@ const TfLiteGpuDelegateOptionsV2 kDefaultOptions =
|
||||
TfLiteGpuDelegateOptionsV2Default();
|
||||
```
|
||||
|
||||
Similar for `NewTfLiteMetalDelgate()`:
|
||||
Similar for `NewTfLiteMetalDelegate()`:
|
||||
|
||||
```c++
|
||||
const TfLiteMetalDelegateOptions kDefaultOptions = {
|
||||
|
@ -124,9 +124,9 @@ class ProfilingCommandQueue : public CLCommandQueue {
|
||||
double GetQueueExecutionTimeMs() const;
|
||||
|
||||
// Difference from GetQueueExecutionTimeMs is that this number doesn't include
|
||||
// time between kernels(kernels launchs or preparing) on GPU. Usually, this
|
||||
// time between kernels(kernels launches or preparing) on GPU. Usually, this
|
||||
// time should be 5-10% better than GetQueueExecutionTimeMs, because 5-10%
|
||||
// spend on something else(maybe kernels launchs or preparing)
|
||||
// spend on something else(maybe kernels launches or preparing)
|
||||
double GetSumOfEventsTimeMs() const;
|
||||
|
||||
// This label will be used for all subsequent dispatches.
|
||||
|
@ -64,7 +64,7 @@ class CLProgram {
|
||||
|
||||
// Return the cl_device_id associated with the program object.
|
||||
// This can be the device associated with context on which the program object
|
||||
// has been created or can be device that was specified when a progam object
|
||||
// has been created or can be device that was specified when a program object
|
||||
// was created using clCreateProgramWithBinary.
|
||||
cl_device_id GetDeviceId() const { return device_id_; }
|
||||
|
||||
|
@ -46,7 +46,7 @@ Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
|
||||
bool IsEglSyncFromClEventSupported();
|
||||
|
||||
// Creates CL event from EGL sync.
|
||||
// Created event could only be comsumed by AcquiredGlObject::Acquire call as
|
||||
// Created event could only be consumed by AcquiredGlObject::Acquire call as
|
||||
// a 'wait_event'.
|
||||
Status CreateClEventFromEglSync(cl_context context, const EglSync& egl_sync,
|
||||
CLEvent* event);
|
||||
|
@ -47,7 +47,7 @@ struct CLNode {
|
||||
// for every operation.
|
||||
std::vector<int2> ranges;
|
||||
|
||||
// Mostly for debug purposess.
|
||||
// Mostly for debug purposes.
|
||||
std::string name;
|
||||
|
||||
CLNode() = default;
|
||||
@ -129,8 +129,8 @@ class InferenceContext {
|
||||
CalculationsPrecision precision_;
|
||||
TensorStorageType storage_type_;
|
||||
|
||||
// Directly mapped nodes from graph, but some of them "inactiv" due
|
||||
// to fusion (inactiv = fused).
|
||||
// Directly mapped nodes from graph, but some of them "inactive" due
|
||||
// to fusion (inactive = fused).
|
||||
// Memory is allocated only once, in ConvertOperations, and is not modified
|
||||
// anywhere.
|
||||
std::vector<CLNode> nodes_;
|
||||
|
@ -29,7 +29,7 @@ namespace {
|
||||
// vec mat mult) on 4 parts to create more threads
|
||||
// tid.y thread process every 4-th element in vec vec dot
|
||||
// Good results for ~1024 x 1024 sizes, for other can be written more
|
||||
// otimized shaders
|
||||
// optimized shaders
|
||||
|
||||
std::string GetFullyConnectedKernelCode(
|
||||
const OperationDef& op_def, const LinearStorage& biases,
|
||||
|
@ -25,7 +25,7 @@ namespace gpu {
|
||||
namespace cl {
|
||||
namespace {
|
||||
|
||||
std::string GetMaxUnoolingKernelCode(
|
||||
std::string GetMaxUnpoolingKernelCode(
|
||||
const OperationDef& op_def, const CLDevice& device,
|
||||
const std::vector<ElementwiseOperation*>& linked_operations) {
|
||||
TensorCodeGenerator src("src_data",
|
||||
@ -102,7 +102,7 @@ std::string GetMaxUnoolingKernelCode(
|
||||
return c;
|
||||
}
|
||||
|
||||
std::string GetMaxUnooling3DKernelCode(
|
||||
std::string GetMaxUnpooling3DKernelCode(
|
||||
const OperationDef& op_def, const CLDevice& device,
|
||||
const std::vector<ElementwiseOperation*>& linked_operations) {
|
||||
TensorCodeGenerator src(
|
||||
@ -219,7 +219,7 @@ MaxUnpooling& MaxUnpooling::operator=(MaxUnpooling&& kernel) {
|
||||
}
|
||||
|
||||
Status MaxUnpooling::Compile(const CreationContext& creation_context) {
|
||||
const auto code = GetMaxUnoolingKernelCode(
|
||||
const auto code = GetMaxUnpoolingKernelCode(
|
||||
definition_, *creation_context.device, linked_operations_);
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
code, "main_function", *creation_context.context,
|
||||
@ -292,7 +292,7 @@ MaxUnpooling3D& MaxUnpooling3D::operator=(MaxUnpooling3D&& kernel) {
|
||||
}
|
||||
|
||||
Status MaxUnpooling3D::Compile(const CreationContext& creation_context) {
|
||||
const auto code = GetMaxUnooling3DKernelCode(
|
||||
const auto code = GetMaxUnpooling3DKernelCode(
|
||||
definition_, *creation_context.device, linked_operations_);
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
code, "main_function", *creation_context.context,
|
||||
|
@ -95,7 +95,7 @@ std::string GetStridedSliceCode(
|
||||
return c;
|
||||
}
|
||||
|
||||
bool Is4Alighed(const SliceAttributes& attr) {
|
||||
bool Is4Aligned(const SliceAttributes& attr) {
|
||||
return attr.strides.c == 1 && attr.starts.c % 4 == 0;
|
||||
}
|
||||
|
||||
@ -129,7 +129,7 @@ int4 GetOffset(const SliceAttributes& attr, int src_width, int src_height,
|
||||
offset.z = src_channels + attr.ends.c;
|
||||
}
|
||||
}
|
||||
if (Is4Alighed(attr)) {
|
||||
if (Is4Aligned(attr)) {
|
||||
offset.z /= 4;
|
||||
}
|
||||
if (attr.strides.b > 0) {
|
||||
@ -167,7 +167,7 @@ StridedSlice& StridedSlice::operator=(StridedSlice&& operation) {
|
||||
}
|
||||
|
||||
Status StridedSlice::Compile(const CreationContext& creation_context) {
|
||||
const auto code = GetStridedSliceCode(definition_, Is4Alighed(attributes_),
|
||||
const auto code = GetStridedSliceCode(definition_, Is4Aligned(attributes_),
|
||||
linked_operations_);
|
||||
return creation_context.cache->GetOrCreateCLKernel(
|
||||
code, "main_function", *creation_context.context,
|
||||
|
@ -640,7 +640,7 @@ extern PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
|
||||
extern PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
|
||||
extern PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
|
||||
|
||||
// For convinient image creation
|
||||
// For convenient image creation
|
||||
// It uses clCreateImage if it available (clCreateImage available since cl 1.2)
|
||||
// otherwise it will use legacy clCreateImage2D
|
||||
cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
|
||||
|
@ -30,7 +30,7 @@ enum class CalculationsPrecision { F32, F32_F16, F16 };
|
||||
// F32_F16 - as F16, but some operations (Convolution,
|
||||
// DepthWiseConvolution, FullyConnected, ConvolutionTransposed)
|
||||
// have accumulator in F32 and usually it calculates 4 mads in F16, sum them,
|
||||
// than converts this partial sum to F32 and add to acumulator.
|
||||
// than converts this partial sum to F32 and add to accumulator.
|
||||
|
||||
DataType DeduceDataTypeFromPrecision(CalculationsPrecision precision);
|
||||
|
||||
|
@ -475,7 +475,7 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D: {
|
||||
if (slices != 1) {
|
||||
return InvalidArgumentError(absl::StrCat(
|
||||
"SINGLE_TEXTURE_2D support only cnannels in range [1-4], but ",
|
||||
"SINGLE_TEXTURE_2D support only channels in range [1-4], but ",
|
||||
shape.c, "was provided"));
|
||||
}
|
||||
cl_image_desc desc;
|
||||
|
@ -82,7 +82,7 @@ enum class MemoryStrategy {
|
||||
Status BestGreedy(const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||
ObjectsAssignment<size_t>* assignment);
|
||||
|
||||
// Calculates the assignement of shared objects to given tensors, including
|
||||
// Calculates the assignment of shared objects to given tensors, including
|
||||
// objects' sizes. Below there are specializations for different types, that
|
||||
// support more memory strategies.
|
||||
// If reallocation_graph is provided, assignment of shared objects support
|
||||
@ -130,7 +130,7 @@ Status AssignObjectsToTensors(
|
||||
MemoryStrategy strategy, ObjectsAssignment<uint3>* assignment,
|
||||
const UsageGraph* reallocation_graph);
|
||||
|
||||
// Calculates the assignement of tensors to offsets, considering those tensors
|
||||
// Calculates the assignment of tensors to offsets, considering those tensors
|
||||
// are going to be allocated in one continuous memory block.
|
||||
Status AssignOffsetsToTensors(
|
||||
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||
|
@ -67,7 +67,7 @@ Status GreedyBySizeAssignment(
|
||||
assignment->offsets.resize(num_tensors);
|
||||
assignment->total_size = 0;
|
||||
|
||||
// Ordered records are to be sorted by size of corrseponding tensor.
|
||||
// Ordered records are to be sorted by size of corresponding tensor.
|
||||
std::vector<TensorUsageWithIndex<size_t>> ordered_records;
|
||||
for (size_t i = 0; i < num_tensors; ++i) {
|
||||
ordered_records.emplace_back(&usage_records[i], i);
|
||||
@ -133,7 +133,7 @@ Status GreedyBySizeAssignment(
|
||||
// - We have tensor usage records of all intermideate tensors as an input. Each
|
||||
// record consists of tensor size, first and last tasks, that use it. Let's call
|
||||
// [first_task..last_task] a tensor usage interval;
|
||||
// - Distance between two usage intervals is the absoulte difference between
|
||||
// - Distance between two usage intervals is the absolute difference between
|
||||
// closest tasks in their intervals. If two usage intervals don't intersect,
|
||||
// than the distance between them is positive;
|
||||
// - Calculate positional maximums vector, e.g. the vector of lower bounds on
|
||||
|
@ -36,7 +36,7 @@ namespace gpu {
|
||||
// gap;
|
||||
// - If such a gap has been found, current tensor should be allocated into this
|
||||
// gap. Otherwise we can allocate it after the rightmost tensor, which usage
|
||||
// interval intersects with usage inteval of current tensor. So we assign
|
||||
// interval intersects with usage interval of current tensor. So we assign
|
||||
// corresponding offset to current tensor and the tensor becomes assigned.
|
||||
Status GreedyBySizeAssignment(
|
||||
const std::vector<TensorUsageRecord<size_t>>& usage_records,
|
||||
@ -47,7 +47,7 @@ Status GreedyBySizeAssignment(
|
||||
// - We have tensor usage records of all intermideate tensors as an input. Each
|
||||
// record consists of tensor size, first and last tasks, that use it. Let's call
|
||||
// [first_task..last_task] a tensor usage interval;
|
||||
// - Distance between two usage intervals is the absoulte difference between
|
||||
// - Distance between two usage intervals is the absolute difference between
|
||||
// closest tasks in their intervals. If two usage intervals don't intersect,
|
||||
// than the distance between them is positive;
|
||||
// - Calculate positional maximums vector, e.g. the vector of lower bounds on
|
||||
|
@ -46,7 +46,7 @@ bool CompareBySize(const TensorUsageWithIndex<size_t>& first,
|
||||
const TensorUsageWithIndex<size_t>& second);
|
||||
|
||||
// TaskProfile is a vector with information about all intermediate tensors, that
|
||||
// should exist in memory during the executon of the task. Elements of the
|
||||
// should exist in memory during the execution of the task. Elements of the
|
||||
// vector must be sorted in non-increasing order of corresponding tensors sizes.
|
||||
using TaskProfile = std::vector<TensorUsageWithIndex<size_t>>;
|
||||
|
||||
|
@ -29,7 +29,7 @@ namespace gpu {
|
||||
namespace testing {
|
||||
|
||||
// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
|
||||
// an appropriate operations resolver. If delegate is nullptr, infererence will
|
||||
// an appropriate operations resolver. If delegate is nullptr, inference will
|
||||
// be done only on CPU.
|
||||
Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
|
||||
TfLiteDelegate* delegate,
|
||||
@ -38,7 +38,7 @@ Status InterpreterInvokeWithOpResolver(const ::tflite::Model* model,
|
||||
std::vector<TensorFloat32>* outputs);
|
||||
|
||||
// Runs Tensorflow Lite model using Tensorflow Lite with a delegate and
|
||||
// builtin operations resolver. If delegate is nullptr, infererence will
|
||||
// builtin operations resolver. If delegate is nullptr, inference will
|
||||
// be done only on CPU.
|
||||
Status InterpreterInvoke(const ::tflite::Model* model, TfLiteDelegate* delegate,
|
||||
const std::vector<TensorFloat32>& inputs,
|
||||
|
@ -126,7 +126,7 @@ std::vector<int> GetDivisorsForRange(int number, int range) {
|
||||
std::vector<int> GetPossibleSizes(int number,
|
||||
WorkGroupSizeAlignment z_alignment) {
|
||||
if (z_alignment == WorkGroupSizeAlignment::PRECISE) {
|
||||
// we will use for potential sizes, sizes that cover grid preciselly
|
||||
// we will use for potential sizes, sizes that cover grid precisely
|
||||
// work group size * k (k is integer) == grid_size
|
||||
return GetDivisors(number);
|
||||
} else {
|
||||
|
@ -79,7 +79,7 @@ typedef struct {
|
||||
// each time inference engine needs to make a decision, it uses
|
||||
// ordered priorities to do so.
|
||||
// For example:
|
||||
// MAX_PRECISION at priority1 would not allow to decrease presision,
|
||||
// MAX_PRECISION at priority1 would not allow to decrease precision,
|
||||
// but moving it to priority2 or priority3 would result in F16 calculation.
|
||||
//
|
||||
// Priority is defined in TfLiteGpuInferencePriority.
|
||||
|
@ -60,7 +60,7 @@ class VariableAccessor : public InlineRewrite {
|
||||
// Returns const variables that need to be inlined in the a shader's code.
|
||||
std::string GetConstDeclarations() const;
|
||||
|
||||
// Returns shared varaible declarations that need to be inlined.
|
||||
// Returns shared variable declarations that need to be inlined.
|
||||
std::string GetSharedVariableDeclarations() const;
|
||||
|
||||
// Returns uniform parameter declarations that need to be inlined.
|
||||
|
@ -131,7 +131,7 @@ Status GetEglError() {
|
||||
case EGL_CONTEXT_LOST:
|
||||
return InternalError(
|
||||
"A power management event has occurred. The application must destroy "
|
||||
"all contexts and reinitialise OpenGL ES state and objects to "
|
||||
"all contexts and reinitialize OpenGL ES state and objects to "
|
||||
"continue rendering.");
|
||||
}
|
||||
return UnknownError("EGL error: " + std::to_string(error));
|
||||
|
@ -77,7 +77,7 @@ class GlSync {
|
||||
// Waits until GPU is done with processing.
|
||||
Status GlSyncWait();
|
||||
|
||||
// Waits until all comands are flushed and then performs active waiting by
|
||||
// Waits until all commands are flushed and then performs active waiting by
|
||||
// spinning a thread and checking sync status. It leads to shorter wait time
|
||||
// (up to tens of ms) but consumes more CPU.
|
||||
Status GlActiveSyncWait();
|
||||
|
@ -74,7 +74,7 @@ TEST(AddTest, InputTensorAndScalar) {
|
||||
Pointwise(FloatNear(1e-6), {-1.9, 0.3, 0.8, 0.9, 1.2, 2.1}));
|
||||
}
|
||||
|
||||
TEST(AddTest, InputTensorWithConstandBroadcast) {
|
||||
TEST(AddTest, InputTensorWithConstantBroadcast) {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
|
@ -33,8 +33,8 @@ namespace {
|
||||
// (b/117291356).
|
||||
|
||||
// Describes the ideal convolution for the specific operation case
|
||||
// Case here means specific "kernel + strides" conbination for specific
|
||||
// operatoins type, not sizes of input and output tensors, they can be any.
|
||||
// Case here means specific "kernel + strides" combination for specific
|
||||
// operations type, not sizes of input and output tensors, they can be any.
|
||||
struct IdealByCase {
|
||||
bool ParamsAccepted(OperationType in_op_type, HW in_kernel,
|
||||
HW in_strides) const {
|
||||
|
@ -129,7 +129,7 @@ uint32_t BufferUseCount(ValueId id,
|
||||
}
|
||||
|
||||
// Examines if the second operation can be linked to the first one. Linking may
|
||||
// be skipped in the situation when conflic may happen: if first operation's
|
||||
// be skipped in the situation when conflict may happen: if first operation's
|
||||
// output is used by more than 1 other operation.
|
||||
bool CanFuseOperations(const ComputeTaskDescriptorPtr first,
|
||||
const ComputeTaskDescriptorPtr second,
|
||||
@ -444,9 +444,9 @@ ComputeTaskDescriptorPtr NonLinkableStub(int operation_id, ValueId input_id,
|
||||
}
|
||||
|
||||
ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
auto fused_desciptor = std::make_shared<ComputeTaskDescriptor>();
|
||||
auto fused_descriptor = std::make_shared<ComputeTaskDescriptor>();
|
||||
// The id of fused descriptor is the id of the first descriptor in the list.
|
||||
fused_desciptor->id = chain.front()->id;
|
||||
fused_descriptor->id = chain.front()->id;
|
||||
FusionSequence sequence;
|
||||
if (chain.front()->is_linkable) {
|
||||
// The first task is linkable so it contains only linkable code. Insert
|
||||
@ -503,7 +503,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
|
||||
call_arguments += ", buffer" + index;
|
||||
input_index++;
|
||||
fused_desciptor->input_buffers.push_back({buffer.id, ""});
|
||||
fused_descriptor->input_buffers.push_back({buffer.id, ""});
|
||||
}
|
||||
}
|
||||
// We have an output id that is the input for the next task.
|
||||
@ -517,7 +517,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
|
||||
call_arguments += ", buffer" + index;
|
||||
immutable_index++;
|
||||
fused_desciptor->immutable_buffers.push_back(buffer);
|
||||
fused_descriptor->immutable_buffers.push_back(buffer);
|
||||
}
|
||||
|
||||
for (auto buffer : desc->uniform_buffers) {
|
||||
@ -527,7 +527,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
buffer.declaration + name + "[[buffer(" + index + ")]],\n";
|
||||
call_arguments += ", buffer" + index;
|
||||
uniform_index++;
|
||||
fused_desciptor->uniform_buffers.push_back({"", buffer.data_function});
|
||||
fused_descriptor->uniform_buffers.push_back({"", buffer.data_function});
|
||||
}
|
||||
|
||||
if (desc->is_linkable) {
|
||||
@ -539,7 +539,7 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
}
|
||||
|
||||
ComputeTaskDescriptorPtr non_linkable = sequence.front();
|
||||
fused_desciptor->shader_source =
|
||||
fused_descriptor->shader_source =
|
||||
absl::Substitute(non_linkable->shader_source, function_code,
|
||||
buffer_declarations, call_code);
|
||||
std::vector<ValueId> alias;
|
||||
@ -547,13 +547,13 @@ ComputeTaskDescriptorPtr FuseChain(const FusionSequence& chain) {
|
||||
for (int i = 0; i < chain.size() - 1; i++) {
|
||||
alias.push_back(chain[i]->output_buffer.id);
|
||||
}
|
||||
fused_desciptor->output_buffer = {
|
||||
fused_descriptor->output_buffer = {
|
||||
fused_id, "", non_linkable->output_buffer.dimensions_function, alias};
|
||||
fused_desciptor->resize_function = non_linkable->resize_function;
|
||||
fused_descriptor->resize_function = non_linkable->resize_function;
|
||||
for (const auto& desc : sequence) {
|
||||
fused_desciptor->description += desc->description + "_";
|
||||
fused_descriptor->description += desc->description + "_";
|
||||
}
|
||||
return fused_desciptor;
|
||||
return fused_descriptor;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -35,7 +35,7 @@ limitations under the License.
|
||||
/// 2. Model compilation. Global list of ComputeTaskDescriptors is transformed
|
||||
/// into the sorted list of sets of descriptors. A set can be transformed
|
||||
/// later into a single GPU task.
|
||||
/// 3. GPU compute tasks generation. Shader code generation happes here.
|
||||
/// 3. GPU compute tasks generation. Shader code generation happens here.
|
||||
/// 4. Intermediate resource allocation.
|
||||
/// Inference.
|
||||
@interface TFLInferenceContext : NSObject
|
||||
@ -72,11 +72,11 @@ limitations under the License.
|
||||
/// Inserts all GPU compute tasks into the command encoder.
|
||||
/// @param inputOutputBuffers Must be created and passed into the method with pairs ID:buffer
|
||||
/// @param encoderBlock User-defined block to take control over command encoder. Can be nil.
|
||||
/// The block can be used, for example, for fine-graned benchmarking where end encoding
|
||||
/// The block can be used, for example, for fine-grained benchmarking where end encoding
|
||||
/// is performed and command buffer is committed with completion block. A new command
|
||||
/// buffer must be created and new command encoder must be returned by the block.
|
||||
/// The block is called after every dispatch encoding.
|
||||
/// @discussion No GPU sychronization functions are used inside. All GPU resources must be created
|
||||
/// @discussion No GPU synchronization functions are used inside. All GPU resources must be created
|
||||
/// with the same device which has been used in compileModelWithDevice() method.
|
||||
- (void)encodeWithEncoder:(id<MTLComputeCommandEncoder>)commandEncoder
|
||||
inputOutputBuffers:(const std::map<::tflite::gpu::ValueId, id<MTLBuffer>>&)inputOutputBuffers
|
||||
|
@ -90,7 +90,7 @@ using ::tflite::gpu::metal::SingleOpModel;
|
||||
XCTAssertTrue(status.ok(), @"%s", status.error_message().c_str());
|
||||
}
|
||||
|
||||
- (void)testInputTensorWithConstandBroadcast {
|
||||
- (void)testInputTensorWithConstantBroadcast {
|
||||
TensorRef<BHWC> input;
|
||||
input.type = DataType::FLOAT32;
|
||||
input.ref = 0;
|
||||
|
@ -24,7 +24,7 @@ const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig =
|
||||
#
|
||||
# The test_id is test_suite_name / test_name, this differs from the
|
||||
# name used by the build because of the / separator instead of .
|
||||
# Parametrised tests names are composed by the base test name / test / ordinal
|
||||
# Parameterized tests names are composed by the base test name / test / ordinal
|
||||
# the ordinal is the position in the list of parameters generated by the
|
||||
# cardinal product of all the different parameter sets
|
||||
|
||||
@ -39,7 +39,7 @@ const constexpr char* NnapiAccelerationTestParams::kAccelerationTestConfig =
|
||||
|
||||
## Test Arguments
|
||||
#
|
||||
# The test can be parametrised with the minimum Android SDK version
|
||||
# The test can be parameterized with the minimum Android SDK version
|
||||
# to apply the acceleration validation for.
|
||||
# If omitted will use 27
|
||||
|
||||
|
@ -155,7 +155,7 @@ bool IsScalarInputSupported(int builtin_code) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the operation requires explict conversion from int8 to uint8 values.
|
||||
// Check if the operation requires explicit conversion from int8 to uint8 values.
|
||||
bool NeedInt8Conversion(const TfLiteContext* context, int builtin_code,
|
||||
const TfLiteNode* node) {
|
||||
const int input_id = node->inputs->data[0];
|
||||
|
@ -172,7 +172,7 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
|
||||
bool disallow_nnapi_cpu;
|
||||
// Tensor to ANeuralNetworksMemory mapping.
|
||||
std::vector<MemoryRegistration> tensor_memory_map;
|
||||
// Constains a non zero value if any NNAPI method call
|
||||
// Contains a non zero value if any NNAPI method call
|
||||
// operation returned a non zero result code.
|
||||
int nnapi_errno;
|
||||
// Cache of kernels already built in StatefulNnApiDelegate::DoPrepare
|
||||
|
@ -4811,17 +4811,17 @@ class PadV2OpConstModel : public PadOpModel<T1> {
|
||||
};
|
||||
|
||||
// Test case where paddings is a non-const tensor.
|
||||
template <typename RegularInputOuput>
|
||||
class PadV2OpDynamicModel : public PadOpModel<RegularInputOuput> {
|
||||
template <typename RegularInputOutput>
|
||||
class PadV2OpDynamicModel : public PadOpModel<RegularInputOutput> {
|
||||
public:
|
||||
PadV2OpDynamicModel(const TensorData& input,
|
||||
std::initializer_list<int> paddings_shape,
|
||||
RegularInputOuput constant_values,
|
||||
RegularInputOutput constant_values,
|
||||
const TensorData& output) {
|
||||
this->input_ = this->AddInput(input);
|
||||
this->paddings_ = this->AddInput(TensorType_INT32);
|
||||
this->constant_values_ = this->AddConstInput(
|
||||
GetTensorType<RegularInputOuput>(), {constant_values}, {1});
|
||||
GetTensorType<RegularInputOutput>(), {constant_values}, {1});
|
||||
this->output_ = this->AddOutput(output);
|
||||
|
||||
this->SetBuiltinOp(BuiltinOperator_PADV2, BuiltinOptions_PadV2Options,
|
||||
|
Loading…
Reference in New Issue
Block a user