Supported Texture3D/Texture2DArray/TextureBuffer storage types in MetalSpatialTensor.
PiperOrigin-RevId: 353847485 Change-Id: I360dd10c83aad7d1ff22a98d1d08f5524dfaaf4c
This commit is contained in:
parent
bdde742906
commit
bab9c3a15d
@ -393,7 +393,7 @@ std::string TensorDescriptor::Read(
|
||||
return absl::StrCat(read_as, "(image_buffer, ", coords[0], ")");
|
||||
} else if (gpu_info.IsApiMetal()) {
|
||||
std::string result =
|
||||
absl::Substitute("image_buffer.read($0))", coords[0]);
|
||||
absl::Substitute("image_buffer.read(uint($0))", coords[0]);
|
||||
if (need_conversion) {
|
||||
result = metal_type + "(" + result + ")";
|
||||
}
|
||||
|
@ -139,6 +139,10 @@ absl::Status FullyConnectedExtraLargeTest(TestExecutionEnvironment* env) {
|
||||
if (!env->GetGpuInfo().IsRoundToNearestSupported()) {
|
||||
eps *= 4.0f;
|
||||
}
|
||||
if (precision == CalculationsPrecision::F32_F16 &&
|
||||
env->GetGpuInfo().IsApiMetal() && env->GetGpuInfo().IsIntel()) {
|
||||
eps = 3.5f;
|
||||
}
|
||||
OperationDef op_def;
|
||||
op_def.precision = precision;
|
||||
auto data_type = DeduceDataTypeFromPrecision(precision);
|
||||
|
@ -50,6 +50,12 @@ MTLPixelFormat DataTypeToRGBAPixelFormat(DataType type, bool normalized = false)
|
||||
void WriteDataToTexture2D(id<MTLTexture> texture, id<MTLDevice> device, const void* data);
|
||||
void ReadDataFromTexture2D(id<MTLTexture> texture, id<MTLDevice> device, void* data);
|
||||
|
||||
void WriteDataToTexture3D(id<MTLTexture> texture, id<MTLDevice> device, const void* data);
|
||||
void ReadDataFromTexture3D(id<MTLTexture> texture, id<MTLDevice> device, void* data);
|
||||
|
||||
void WriteDataToTexture2DArray(id<MTLTexture> texture, id<MTLDevice> device, const void* data);
|
||||
void ReadDataFromTexture2DArray(id<MTLTexture> texture, id<MTLDevice> device, void* data);
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -184,6 +184,114 @@ void ReadDataFromTexture2D(id<MTLTexture> texture, id<MTLDevice> device, void* d
|
||||
std::memcpy(data, [temp_buffer contents], buffer_size);
|
||||
}
|
||||
|
||||
void WriteDataToTexture3D(id<MTLTexture> texture, id<MTLDevice> device, const void* data) {
|
||||
const int pixel_size = PixelFormatToSizeInBytes(texture.pixelFormat);
|
||||
id<MTLBuffer> temp_buffer =
|
||||
[device newBufferWithBytes:data
|
||||
length:pixel_size * texture.width * texture.height * texture.depth
|
||||
options:MTLResourceStorageModeShared];
|
||||
|
||||
id<MTLCommandQueue> command_queue = [device newCommandQueue];
|
||||
id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
|
||||
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [command_buffer blitCommandEncoder];
|
||||
[blitCommandEncoder copyFromBuffer:temp_buffer
|
||||
sourceOffset:0
|
||||
sourceBytesPerRow:pixel_size * texture.width
|
||||
sourceBytesPerImage:pixel_size * texture.width * texture.height
|
||||
sourceSize:MTLSizeMake(texture.width, texture.height, texture.depth)
|
||||
toTexture:texture
|
||||
destinationSlice:0
|
||||
destinationLevel:0
|
||||
destinationOrigin:MTLOriginMake(0, 0, 0)];
|
||||
[blitCommandEncoder endEncoding];
|
||||
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
}
|
||||
|
||||
void ReadDataFromTexture3D(id<MTLTexture> texture, id<MTLDevice> device, void* data) {
|
||||
const int pixel_size = PixelFormatToSizeInBytes(texture.pixelFormat);
|
||||
const int buffer_size = pixel_size * texture.width * texture.height * texture.depth;
|
||||
id<MTLBuffer> temp_buffer = [device newBufferWithLength:buffer_size
|
||||
options:MTLResourceStorageModeShared];
|
||||
|
||||
id<MTLCommandQueue> command_queue = [device newCommandQueue];
|
||||
id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
|
||||
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [command_buffer blitCommandEncoder];
|
||||
[blitCommandEncoder copyFromTexture:texture
|
||||
sourceSlice:0
|
||||
sourceLevel:0
|
||||
sourceOrigin:MTLOriginMake(0, 0, 0)
|
||||
sourceSize:MTLSizeMake(texture.width, texture.height, texture.depth)
|
||||
toBuffer:temp_buffer
|
||||
destinationOffset:0
|
||||
destinationBytesPerRow:pixel_size * texture.width
|
||||
destinationBytesPerImage:pixel_size * texture.width * texture.height];
|
||||
[blitCommandEncoder endEncoding];
|
||||
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
std::memcpy(data, [temp_buffer contents], buffer_size);
|
||||
}
|
||||
|
||||
void WriteDataToTexture2DArray(id<MTLTexture> texture, id<MTLDevice> device, const void* data) {
|
||||
const int pixel_size = PixelFormatToSizeInBytes(texture.pixelFormat);
|
||||
id<MTLBuffer> temp_buffer =
|
||||
[device newBufferWithBytes:data
|
||||
length:pixel_size * texture.width * texture.height * texture.arrayLength
|
||||
options:MTLResourceStorageModeShared];
|
||||
|
||||
id<MTLCommandQueue> command_queue = [device newCommandQueue];
|
||||
id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
|
||||
|
||||
for (int i = 0; i < texture.arrayLength; ++i) {
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [command_buffer blitCommandEncoder];
|
||||
[blitCommandEncoder copyFromBuffer:temp_buffer
|
||||
sourceOffset:pixel_size * texture.width * texture.height * i
|
||||
sourceBytesPerRow:pixel_size * texture.width
|
||||
sourceBytesPerImage:pixel_size * texture.width * texture.height
|
||||
sourceSize:MTLSizeMake(texture.width, texture.height, 1)
|
||||
toTexture:texture
|
||||
destinationSlice:i
|
||||
destinationLevel:0
|
||||
destinationOrigin:MTLOriginMake(0, 0, 0)];
|
||||
[blitCommandEncoder endEncoding];
|
||||
}
|
||||
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
}
|
||||
|
||||
void ReadDataFromTexture2DArray(id<MTLTexture> texture, id<MTLDevice> device, void* data) {
|
||||
const int pixel_size = PixelFormatToSizeInBytes(texture.pixelFormat);
|
||||
const int buffer_size = pixel_size * texture.width * texture.height * texture.arrayLength;
|
||||
id<MTLBuffer> temp_buffer = [device newBufferWithLength:buffer_size
|
||||
options:MTLResourceStorageModeShared];
|
||||
|
||||
id<MTLCommandQueue> command_queue = [device newCommandQueue];
|
||||
id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
|
||||
|
||||
for (int i = 0; i < texture.arrayLength; ++i) {
|
||||
id<MTLBlitCommandEncoder> blitCommandEncoder = [command_buffer blitCommandEncoder];
|
||||
[blitCommandEncoder copyFromTexture:texture
|
||||
sourceSlice:i
|
||||
sourceLevel:0
|
||||
sourceOrigin:MTLOriginMake(0, 0, 0)
|
||||
sourceSize:MTLSizeMake(texture.width, texture.height, 1)
|
||||
toBuffer:temp_buffer
|
||||
destinationOffset:pixel_size * texture.width * texture.height * i
|
||||
destinationBytesPerRow:pixel_size * texture.width
|
||||
destinationBytesPerImage:pixel_size * texture.width * texture.height];
|
||||
[blitCommandEncoder endEncoding];
|
||||
}
|
||||
|
||||
[command_buffer commit];
|
||||
[command_buffer waitUntilCompleted];
|
||||
std::memcpy(data, [temp_buffer contents], buffer_size);
|
||||
}
|
||||
|
||||
} // namespace metal
|
||||
} // namespace gpu
|
||||
} // namespace tflite
|
||||
|
@ -31,7 +31,8 @@ absl::Status AllocateTensorMemory(id<MTLDevice> device, const BHWDC& shape,
|
||||
id<MTLTexture>* texture) {
|
||||
const int slices = DivideRoundUp(shape.c, 4);
|
||||
switch (descriptor.storage_type) {
|
||||
case TensorStorageType::BUFFER: {
|
||||
case TensorStorageType::BUFFER:
|
||||
case TensorStorageType::IMAGE_BUFFER: {
|
||||
const size_t data_size = shape.b * shape.w * shape.h * shape.d * slices *
|
||||
4 * SizeOf(descriptor.data_type);
|
||||
if (data_ptr) {
|
||||
@ -42,6 +43,28 @@ absl::Status AllocateTensorMemory(id<MTLDevice> device, const BHWDC& shape,
|
||||
*buffer = [device newBufferWithLength:data_size
|
||||
options:MTLResourceStorageModeShared];
|
||||
}
|
||||
if (!*buffer) {
|
||||
return absl::UnknownError("Failed to allocate id<MTLBuffer>");
|
||||
}
|
||||
if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||
MTLTextureDescriptor* texture_desc =
|
||||
[[MTLTextureDescriptor alloc] init];
|
||||
texture_desc.width = shape.b * shape.w * shape.h * shape.d * slices;
|
||||
texture_desc.pixelFormat =
|
||||
DataTypeToRGBAPixelFormat(descriptor.data_type, false);
|
||||
if (@available(iOS 12.0, *)) {
|
||||
texture_desc.textureType = MTLTextureTypeTextureBuffer;
|
||||
}
|
||||
texture_desc.usage =
|
||||
MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
|
||||
texture_desc.storageMode = MTLStorageModePrivate;
|
||||
*texture = [*buffer newTextureWithDescriptor:texture_desc
|
||||
offset:0
|
||||
bytesPerRow:data_size];
|
||||
if (!*texture) {
|
||||
return absl::UnknownError("Failed to allocate id<MTLTexture>");
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
case TensorStorageType::TEXTURE_2D: {
|
||||
@ -65,9 +88,48 @@ absl::Status AllocateTensorMemory(id<MTLDevice> device, const BHWDC& shape,
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
case TensorStorageType::TEXTURE_3D: {
|
||||
MTLTextureDescriptor* texture_desc = [[MTLTextureDescriptor alloc] init];
|
||||
texture_desc.width = shape.w * shape.b;
|
||||
texture_desc.height = shape.h;
|
||||
texture_desc.depth = slices * shape.d;
|
||||
texture_desc.pixelFormat =
|
||||
DataTypeToRGBAPixelFormat(descriptor.data_type, false);
|
||||
texture_desc.textureType = MTLTextureType3D;
|
||||
texture_desc.usage =
|
||||
MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
|
||||
texture_desc.storageMode = MTLStorageModePrivate;
|
||||
|
||||
*texture = [device newTextureWithDescriptor:texture_desc];
|
||||
if (!*texture) {
|
||||
return absl::UnknownError("Failed to allocate id<MTLTexture>");
|
||||
}
|
||||
if (data_ptr) {
|
||||
WriteDataToTexture3D(*texture, device, data_ptr);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
case TensorStorageType::TEXTURE_ARRAY: {
|
||||
MTLTextureDescriptor* texture_desc = [[MTLTextureDescriptor alloc] init];
|
||||
texture_desc.width = shape.w * shape.b;
|
||||
texture_desc.height = shape.h;
|
||||
texture_desc.arrayLength = slices * shape.d;
|
||||
texture_desc.pixelFormat =
|
||||
DataTypeToRGBAPixelFormat(descriptor.data_type, false);
|
||||
texture_desc.textureType = MTLTextureType2DArray;
|
||||
texture_desc.usage =
|
||||
MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite;
|
||||
texture_desc.storageMode = MTLStorageModePrivate;
|
||||
|
||||
*texture = [device newTextureWithDescriptor:texture_desc];
|
||||
if (!*texture) {
|
||||
return absl::UnknownError("Failed to allocate id<MTLTexture>");
|
||||
}
|
||||
if (data_ptr) {
|
||||
WriteDataToTexture2DArray(*texture, device, data_ptr);
|
||||
}
|
||||
return absl::OkStatus();
|
||||
}
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
default:
|
||||
return absl::InternalError("Unsupported tensor storage type");
|
||||
@ -194,9 +256,16 @@ absl::Status MetalSpatialTensor::GetGPUResources(
|
||||
resources->buffers.push_back({"buffer", memory_});
|
||||
} else if (descriptor_.storage_type == TensorStorageType::TEXTURE_2D) {
|
||||
resources->images2d.push_back({"image2d", texture_mem_});
|
||||
} else {
|
||||
return absl::UnimplementedError(
|
||||
"Only TensorStorageType BUFFER or TEXTURE_2D supported.");
|
||||
} else if (descriptor_.storage_type == TensorStorageType::TEXTURE_3D) {
|
||||
resources->images3d.push_back({"image3d", texture_mem_});
|
||||
} else if (descriptor_.storage_type == TensorStorageType::TEXTURE_ARRAY) {
|
||||
resources->image2d_arrays.push_back({"image2d_array", texture_mem_});
|
||||
} else if (descriptor_.storage_type == TensorStorageType::IMAGE_BUFFER) {
|
||||
if (obj_ptr->GetAccess() == AccessType::READ) {
|
||||
resources->image_buffers.push_back({"image_buffer", texture_mem_});
|
||||
} else {
|
||||
resources->buffers.push_back({"buffer", memory_});
|
||||
}
|
||||
}
|
||||
|
||||
return absl::OkStatus();
|
||||
@ -307,14 +376,18 @@ absl::Status MetalSpatialTensor::WriteDataBHWDC(id<MTLDevice> device,
|
||||
|
||||
switch (descriptor_.storage_type) {
|
||||
case TensorStorageType::BUFFER:
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
std::memcpy([memory_ contents], data_ptr, data_size);
|
||||
break;
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
WriteDataToTexture2D(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
WriteDataToTexture3D(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
WriteDataToTexture2DArray(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
default:
|
||||
return absl::InternalError("Unsupported tensor storage type");
|
||||
@ -366,14 +439,18 @@ absl::Status MetalSpatialTensor::ReadDataBHWDC(id<MTLDevice> device,
|
||||
|
||||
switch (descriptor_.storage_type) {
|
||||
case TensorStorageType::BUFFER:
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
std::memcpy(data_ptr, [memory_ contents], data_size);
|
||||
break;
|
||||
case TensorStorageType::TEXTURE_2D:
|
||||
ReadDataFromTexture2D(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::IMAGE_BUFFER:
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
case TensorStorageType::TEXTURE_3D:
|
||||
ReadDataFromTexture3D(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::TEXTURE_ARRAY:
|
||||
ReadDataFromTexture2DArray(texture_mem_, device, data_ptr);
|
||||
break;
|
||||
case TensorStorageType::SINGLE_TEXTURE_2D:
|
||||
default:
|
||||
return absl::InternalError("Unsupported tensor storage type");
|
||||
|
@ -60,7 +60,9 @@ static absl::Status TensorGenericTest(const BHWC& shape,
|
||||
|
||||
for (int i = 0; i < tensor_gpu.data.size(); ++i) {
|
||||
if (tensor_gpu.data[i] != tensor_cpu.data[i]) {
|
||||
return absl::InternalError("Wrong value.");
|
||||
return absl::InternalError("Wrong value at index - " + std::to_string(i) + ". GPU - " +
|
||||
std::to_string(tensor_gpu.data[i]) + ", CPU - " +
|
||||
std::to_string(tensor_cpu.data[i]));
|
||||
}
|
||||
}
|
||||
return absl::OkStatus();
|
||||
@ -136,22 +138,62 @@ static absl::Status TensorTests(DataType data_type, TensorStorageType storage_ty
|
||||
|
||||
- (void)testBufferF32 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
XCTAssertTrue(TensorTests(DataType::FLOAT32, TensorStorageType::BUFFER, device).ok());
|
||||
auto status = TensorTests(DataType::FLOAT32, TensorStorageType::BUFFER, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testBufferF16 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
XCTAssertTrue(TensorTests(DataType::FLOAT16, TensorStorageType::BUFFER, device).ok());
|
||||
auto status = TensorTests(DataType::FLOAT16, TensorStorageType::BUFFER, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture2DF32 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
XCTAssertTrue(TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_2D, device).ok());
|
||||
auto status = TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_2D, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture2DF16 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
XCTAssertTrue(TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_2D, device).ok());
|
||||
auto status = TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_2D, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture3DF32 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_3D, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture3DF16 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_3D, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture2DArrayF32 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT32, TensorStorageType::TEXTURE_ARRAY, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTexture2DArrayF16 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT16, TensorStorageType::TEXTURE_ARRAY, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTextureBufferF32 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT32, TensorStorageType::IMAGE_BUFFER, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
- (void)testTextureBufferF16 {
|
||||
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
||||
auto status = TensorTests(DataType::FLOAT16, TensorStorageType::IMAGE_BUFFER, device);
|
||||
XCTAssertTrue(status.ok(), @"%s", std::string(status.message()).c_str());
|
||||
}
|
||||
|
||||
@end
|
||||
|
Loading…
Reference in New Issue
Block a user