diff --git a/tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.cc b/tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.cc index 5bfbf97ad26..17f8f246261 100644 --- a/tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.cc +++ b/tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.cc @@ -45,7 +45,6 @@ kernel void ComputeFunction($1 return; } FLT4 value = args.src_tensor.Read(X, Y, Z); - $2 args.dst_tensor.Write(value, X, Y, Z); } )"; @@ -148,7 +147,6 @@ void ComputeTaskDescriptor::AssembleCode() { return std::make_pair(groups_size, groups_count); }; } - shader_source = absl::Substitute(shader_source, "$0", "$1", ""); } } // namespace metal diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc b/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc index 295f1313fcc..4ce5a227791 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/concat.cc @@ -84,23 +84,14 @@ std::string GetConcatChannelsCode(const OperationDef& op_def, c += " for (int i = 0; i < " + t_name + ".Slices(); i += 2) {\n"; c += " FLT4 result0 = " + t_name + ".Read(" + coords + ", i);\n"; c += " FLT4 result1 = " + t_name + ".Read(" + coords + ", i + 1);\n"; - c += " uint3 gid = uint3(ugid.x, ugid.y, uint(S));\n"; - c += " $2\n"; - c += " FLT4 value = result0;\n"; - c += " args.dst_tensor.Write(value, " + coords + ", S);\n"; - c += " gid = uint3(ugid.x, ugid.y, uint(S + 1));\n"; - c += " $2\n"; - c += " value = result1;\n"; - c += " args.dst_tensor.Write(value, " + coords + ", S + 1);\n"; + c += " args.dst_tensor.Write(result0, " + coords + ", S);\n"; + c += " args.dst_tensor.Write(result1, " + coords + ", S + 1);\n"; c += " S += 2;\n"; c += " }\n"; } else { c += " for (int i = 0; i < " + t_name + ".Slices(); ++i) {\n"; c += " FLT4 result = " + t_name + ".Read(" + coords + ", i);\n"; - c += " uint3 gid = uint3(ugid.x, ugid.y, uint(S));\n"; - c += " $2\n"; - c += " FLT4 value = result;\n"; - c += " args.dst_tensor.Write(value, " + coords + ", S);\n"; + c += " args.dst_tensor.Write(result, " + coords + ", S);\n"; c += " S++;\n"; c += " }\n"; } @@ -125,11 +116,6 @@ std::string GetConcatChannelsCode(const OperationDef& op_def, out_channel++; if (out_channel == 4) { out_channel = 0; - c += " {\n"; - c += " uint3 gid = uint3(ugid.x, ugid.y, uint(" + - std::to_string(z) + "));\n"; - c += " $2\n"; - c += " }\n"; c += " args.dst_tensor.Write(value, " + coords + ", " + std::to_string(z) + ");\n"; z++; @@ -139,11 +125,6 @@ std::string GetConcatChannelsCode(const OperationDef& op_def, } } if (out_channel != 0) { - c += " {\n"; - c += " uint3 gid = uint3(ugid.x, ugid.y, uint(" + std::to_string(z) + - "));\n"; - c += " $2\n"; - c += " }\n"; c += " args.dst_tensor.Write(value, " + coords + ", " + std::to_string(z) + ");\n"; } @@ -273,10 +254,6 @@ std::string GetConcatKernelCode(const OperationDef& op_def, c += " } \n"; c += " coord -= " + field + ";\n"; } - c += " {\n"; - c += " uint3 gid = uint3(ugid.x, ugid.y, ugid.z);\n"; - c += " $2\n"; - c += " }\n"; c += " args.dst_tensor.Write(value, " + dst_coord + ");\n"; c += "}\n"; return c; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc index 9706a6cda8e..be5a9e254bb 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/conv.cc @@ -549,9 +549,6 @@ kernel void ComputeFunction( c += " FLT4 value = FLT4(r" + s_zyx + ");\n"; c += " int linear_index = offset_" + s_yx + " + args.dst_tensor.SliceStride() * " + s_z + ";\n"; - c += " uint3 gid = uint3(X + " + s_x + ", Y + " + s_y + ", Z + " + - s_z + ");\n"; - c += " $2\n"; c += " args.dst_tensor.Linking(value, X + " + s_x + ", Y + " + s_y + ", Z + " + s_z + ");\n"; c += " args.dst_tensor.WriteLinear(value, linear_index);\n"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.cc index 22524b14bc0..7e7f9700544 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/depthwise_conv.cc @@ -160,26 +160,18 @@ kernel void ComputeFunction( if (y0_in && x0_in) { FLT4 value = FLT4(r0); - uint3 gid = uint3(gid_x, gid_y, gid_z); - $2 args.dst_tensor.Write(value, gid_x, gid_y, gid_z); } if (y1_in && x0_in) { FLT4 value = FLT4(l0); - uint3 gid = uint3(gid_x, gid_y + 1, gid_z); - $2 args.dst_tensor.Write(value, gid_x, gid_y + 1, gid_z); } if (y0_in && x1_in) { FLT4 value = FLT4(t0); - uint3 gid = uint3(gid_x + 1, gid_y, gid_z); - $2 args.dst_tensor.Write(value, gid_x + 1, gid_y, gid_z); } if (y1_in && x1_in) { FLT4 value = FLT4(b0); - uint3 gid = uint3(gid_x + 1, gid_y + 1, gid_z); - $2 args.dst_tensor.Write(value, gid_x + 1, gid_y + 1, gid_z); } } @@ -330,14 +322,10 @@ kernel void ComputeFunction( if (y0_in) { FLT4 value = FLT4(r0); - uint3 gid = uint3(gid_x, gid_y, gid_z); - $2 args.dst_tensor.Write(value, gid_x, gid_y, gid_z); } if (y1_in) { FLT4 value = FLT4(l0); - uint3 gid = uint3(gid_x, gid_y + 1, gid_z); - $2 args.dst_tensor.Write(value, gid_x, gid_y + 1, gid_z); } } @@ -454,9 +442,7 @@ kernel void ComputeFunction( } } FLT4 res = FLT4(sum0) + args.biases.Read(dst_z); - FLT4 value = res; - $2 - args.dst_tensor.Write(value, dst_x, dst_y, dst_z); + args.dst_tensor.Write(res, dst_x, dst_y, dst_z); } )"; ComputeTaskDescriptor desc(definition); diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc index ff9b0d3d20b..04fa7effa0c 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/fully_connected.cc @@ -104,8 +104,6 @@ std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels, if (tid.y == 0 && tid.x % 4 == 0 && dst_s < args.dst_tensor.Slices()) { FLT4 value = FLT4(temp[tid.x][0], temp[tid.x + 1][0], temp[tid.x + 2][0], temp[tid.x + 3][0]) + args.bias.Read(dst_s); - uint3 gid = uint3(0u, 0u, uint(dst_s)); - $$2 args.dst_tensor.Write(value, 0, 0, dst_s); } } diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc index cb5dc973792..1b46b05f50c 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/max_unpooling.cc @@ -67,7 +67,6 @@ kernel void ComputeFunction( value.z = t_index == indexes.z ? src_color.z : 0.0; value.w = t_index == indexes.w ? src_color.w : 0.0; - $2 args.dst_tensor.Write(value, X, Y, gid.z); } )"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc b/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc index 9a92a94593c..86d4ee7565f 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/mean.cc @@ -86,7 +86,6 @@ std::string GetMeanCode(const int3& work_group_size) { } c += " FLT4 value = FLT4(sum * args.inv_multiplier_y);\n"; c += R"( - $2 args.dst_tensor.Write(value, 0, 0, gid.z); } )"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/padding.cc b/tensorflow/lite/delegates/gpu/metal/kernels/padding.cc index 1edff8be184..8ed67343b3d 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/padding.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/padding.cc @@ -119,7 +119,6 @@ std::string GetPaddingCode(const PadAttributes& attr) { } code += " }\n"; } - code += " $2\n"; code += " args.dst_tensor.Write(value, gid.x, gid.y, gid.z);\n"; code += "}\n"; return code; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/metal/kernels/pooling.cc index 65632482538..7cb42f0a3b3 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/pooling.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/pooling.cc @@ -59,9 +59,7 @@ kernel void ComputeFunction( maximum = max(maximum, src_color); } } - FLT4 value = maximum; - $2 - args.dst_tensor.Write(value, gid.x, gid.y, gid.z); + args.dst_tensor.Write(maximum, gid.x, gid.y, gid.z); } )"; return shader_source; @@ -110,9 +108,7 @@ kernel void ComputeFunction( index_counter++; } } - args.dst_tensor.GetAddress(linear_index, gid.x, gid.y, gid.z); FLT4 value = static_cast(indexes); - $2 args.dst_tensor.Write(value, gid.x, gid.y, gid.z); } )"; @@ -147,11 +143,9 @@ kernel void ComputeFunction( sum += src_color; } } - args.dst_tensor.GetAddress(linear_index, gid.x, gid.y, gid.z); // If window_size==0, window covered nothing. This situation is a sign of // incorrectly constructed operation. NaNs are expected as output. FLT4 value = FLT4(sum / window_size); - $2 args.dst_tensor.Write(value, gid.x, gid.y, gid.z); } )"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/metal/kernels/reshape.cc index 722e4c6590a..39709e310ac 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/reshape.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/reshape.cc @@ -62,8 +62,6 @@ kernel void ComputeFunction( value[i] = args.src_tensor.Read(src_x, src_y, src_layer)[src_channel]; } } - - $2 args.dst_tensor.Write(value, igid.x, igid.y, igid.z); })"; return code; @@ -91,7 +89,6 @@ kernel void ComputeFunction( int src_z = t0 - src_x * args.src_tensor.Slices(); // t0 % args.src_tensor.Slices(); FLT4 value = args.src_tensor.Read(src_x, src_y, src_z); - $2 args.dst_tensor.Write(value, X, Y, Z); })"; return code; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/resize.cc b/tensorflow/lite/delegates/gpu/metal/kernels/resize.cc index 89570667494..d3cb1405daf 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/resize.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/resize.cc @@ -65,7 +65,6 @@ kernel void ComputeFunction( // bilinear interpolation FLT4 value = mix(mix(tex11, tex21, static_cast(t.x)), mix(tex12, tex22, static_cast(t.x)), static_cast(t.y)); - $2 args.dst_tensor.Write(value, gid.x, gid.y, gid.z); } )"; @@ -106,8 +105,6 @@ kernel void ComputeFunction( c += " coord.y = min(coord.y, args.src_tensor.Height() - 1);\n"; c += R"( FLT4 value = args.src_tensor.Read(coord.x, coord.y, gid.z); - args.dst_tensor.GetAddress(linear_index, gid.x, gid.y, gid.z); - $2 args.dst_tensor.Write(value, gid.x, gid.y, gid.z); } )"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/slice.cc b/tensorflow/lite/delegates/gpu/metal/kernels/slice.cc index be226fbcb04..a55fe06550e 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/slice.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/slice.cc @@ -134,9 +134,7 @@ kernel void ComputeFunction($1 c += " }\n"; } } - c += " FLT4 value = result;\n"; - c += " $2\n"; - c += " args.dst_tensor.Write(value, X, Y, Z);\n"; + c += " args.dst_tensor.Write(result, X, Y, Z);\n"; c += "}\n"; return c; } diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc index 6176229753f..bd1fc552f53 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/softmax.cc @@ -117,8 +117,6 @@ kernel void ComputeFunction($1 if (dst_s < args.src_tensor.Slices()) { float4 src = float4(args.src_tensor.Read(0, 0, dst_s)) - float4(maximum); FLT4 value = FLT4(exp(src) * sum); - uint3 gid = uint3(0, 0, dst_s); - $2 args.dst_tensor.Write(value, 0, 0, dst_s); } })"; @@ -161,7 +159,6 @@ kernel void ComputeFunction( for (int d = 0; d < args.dst_tensor.Slices(); ++d) { float4 src = float4(args.src_tensor.Read(gid.x, gid.y, d)) - float4(maximum); FLT4 value = FLT4(exp(src) / sum); - $2 args.dst_tensor.Write(value, gid.x, gid.y, d); } } diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.cc index f9eca52f1e7..ce4252d2ff7 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/space_to_depth.cc @@ -50,7 +50,6 @@ kernel void ComputeFunction($1 uint3 gid[[thread_position_in_grid]]) { uint src_c = dst_c % args.src_tensor.Channels(); value[i] = args.src_tensor.Read(src_x, src_y, src_c / 4)[src_c % 4]; } - $2 args.dst_tensor.Write(value, gid.x, gid.y, gid.z); })"; diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc index adb454b780e..b413cb24bfa 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/transpose_conv.cc @@ -103,8 +103,6 @@ std::string GetDeconvolution(const ConvolutionTransposedAttributes& attr) { for (short l = 0; l < dst_depth; ++l) { FLT4 value = FLT4(out[l * 4], out[l * 4 + 1], out[l * 4 + 2], out[l * 4 + 3]) + args.biases.Read(l); - uint3 gid = uint3(ugid.x, ugid.y, uint(l)); - $$2 args.dst_tensor.Write(value, ugid.x, ugid.y, l); } } @@ -225,8 +223,6 @@ std::string GetDeconvolutionShared(const ConvolutionTransposedAttributes& attr, for (short l = 0; l < dst_depth; ++l) { FLT4 value = FLT4(out[l * 4], out[l * 4 + 1], out[l * 4 + 2], out[l * 4 + 3]) + args.biases.Read(l); - uint3 gid = uint3(ugid.x, ugid.y, uint(l)); - $$2 args.dst_tensor.Write(value, ugid.x, ugid.y, l); } } @@ -400,8 +396,6 @@ kernel void ComputeFunction( c += " if (" + x_check + " && " + y_check + ") {\n"; c += " FLT4 value = FLT4(" + R + ") + bias_val;\n"; std::string dst_coords = dst_x + ", " + dst_y + ", Z"; - c += " uint3 gid = uint3(" + dst_coords + ");\n"; - c += " $2\n"; c += " args.dst_tensor.Write(value, " + dst_coords + ");\n"; c += " }\n"; } diff --git a/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc b/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc index 022565af21b..c8ec883b84d 100644 --- a/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc +++ b/tensorflow/lite/delegates/gpu/metal/kernels/winograd.cc @@ -307,28 +307,20 @@ kernel void ComputeFunction($1 FLT4 t1 = I[y][3] + I[y][4]; if (tile_x < args.dst_tensor.Width()) { FLT4 value = I[y][0] + t0 + t1 + bias_val; - uint3 gid = uint3(tile_x, tile_y + y, global_ids.z); - $2 args.dst_tensor.Write(value, tile_x, tile_y + y, global_ids.z); } FLT4 t2 = I[y][1] - I[y][2]; FLT4 t3 = I[y][3] - I[y][4]; if (tile_x + 1 < args.dst_tensor.Width()) { FLT4 value = t2 * At[7] + t3 * At[9] + bias_val; - uint3 gid = uint3(tile_x + 1, tile_y + y, global_ids.z); - $2 args.dst_tensor.Write(value, tile_x + 1, tile_y + y, global_ids.z); } if (tile_x + 2 < args.dst_tensor.Width()) { FLT4 value = t0 * At[13] + t1 * At[15] + bias_val; - uint3 gid = uint3(tile_x + 2, tile_y + y, global_ids.z); - $2 args.dst_tensor.Write(value, tile_x + 2, tile_y + y, global_ids.z); } if (tile_x + 3 < args.dst_tensor.Width()) { FLT4 value = t2 * At[19] + t3 * At[21] + I[y][5] + bias_val; - uint3 gid = uint3(tile_x + 3, tile_y + y, global_ids.z); - $2 args.dst_tensor.Write(value, tile_x + 3, tile_y + y, global_ids.z); } } @@ -411,28 +403,20 @@ kernel void ComputeFunction($1 FLT4 bias_val = args.biases.Read(DST_Z); if (tile_x < args.dst_tensor.Width()) { FLT4 value = I0 + t0 + t1 + bias_val; - uint3 gid = uint3(tile_x, tile_y, global_ids.z); - $2; args.dst_tensor.Write(value, tile_x, tile_y, global_ids.z); } FLT4 t2 = I1 - I2; FLT4 t3 = I3 - I4; if (tile_x + 1 < args.dst_tensor.Width()) { FLT4 value = t2 * At[7] + t3 * At[9] + bias_val; - uint3 gid = uint3(tile_x + 1, tile_y, global_ids.z); - $2; args.dst_tensor.Write(value, tile_x + 1, tile_y, global_ids.z); } if (tile_x + 2 < args.dst_tensor.Width()) { FLT4 value = t0 * At[13] + t1 * At[15] + bias_val; - uint3 gid = uint3(tile_x + 2, tile_y, global_ids.z); - $2; args.dst_tensor.Write(value, tile_x + 2, tile_y, global_ids.z); } if (tile_x + 3 < args.dst_tensor.Width()) { FLT4 value = t2 * At[19] + t3 * At[21] + I5 + bias_val; - uint3 gid = uint3(tile_x + 3, tile_y, global_ids.z); - $2; args.dst_tensor.Write(value, tile_x + 3, tile_y, global_ids.z); } }