Remove the experimental xla quantization work from open source (II)
PiperOrigin-RevId: 305821944 Change-Id: I3d606fd11cb3f92691fee3a85b9d35a29b2038da
This commit is contained in:
parent
84ff3e44b2
commit
f22174826d
@ -70,7 +70,6 @@ cc_library(
|
||||
"//tensorflow/compiler/mlir/lite:tensorflow_lite_quantize",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_passes",
|
||||
"//tensorflow/compiler/mlir/lite/quantization/tensorflow:tf_to_quant",
|
||||
"//tensorflow/compiler/mlir/lite/quantization/xla:hlo_xla_quantization_passes",
|
||||
"//tensorflow/compiler/mlir/tensorflow",
|
||||
"//tensorflow/compiler/mlir/tensorflow:tensorflow_dialect_registration",
|
||||
"//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
|
||||
|
@ -14,7 +14,10 @@ package(
|
||||
package_group(
|
||||
name = "friends",
|
||||
includes = ["//third_party/mlir:subpackages"],
|
||||
packages = ["//tensorflow/compiler/mlir/..."],
|
||||
packages = [
|
||||
"//learning/brain/experimental/mlir/quantization/...",
|
||||
"//tensorflow/compiler/mlir/...",
|
||||
],
|
||||
)
|
||||
|
||||
exports_files([
|
||||
|
@ -1,86 +0,0 @@
|
||||
load(
|
||||
"//third_party/mlir:tblgen.bzl",
|
||||
"gentbl",
|
||||
)
|
||||
|
||||
package(
|
||||
default_visibility = [
|
||||
":friends",
|
||||
],
|
||||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
package_group(
|
||||
name = "friends",
|
||||
includes = ["//third_party/mlir:subpackages"],
|
||||
packages = [
|
||||
"//learning/brain/experimental/mlir/quantization/...",
|
||||
"//tensorflow/compiler/mlir/...",
|
||||
"//tensorflow/compiler/mlir/lite/...",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "hlo_xla_quantization_passes",
|
||||
srcs = [
|
||||
"cpu_kernel_fusion.cc",
|
||||
"generated_cpu_kernel_fusion.inc",
|
||||
"materialize.cc",
|
||||
"op_quant_spec.inc",
|
||||
"propagate.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"passes.h",
|
||||
],
|
||||
deps = [
|
||||
":cpu_device_target",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_config",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_context",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_lib",
|
||||
"//tensorflow/compiler/mlir/xla:hlo",
|
||||
"//tensorflow/compiler/xla/client/lib:quantize",
|
||||
"@com_google_absl//absl/memory",
|
||||
"@llvm-project//llvm:support",
|
||||
"@llvm-project//mlir:IR",
|
||||
"@llvm-project//mlir:Pass",
|
||||
"@llvm-project//mlir:QuantOps",
|
||||
"@llvm-project//mlir:StandardOps",
|
||||
"@llvm-project//mlir:Support",
|
||||
"@llvm-project//mlir:TransformUtils",
|
||||
],
|
||||
alwayslink = 1,
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "cpu_device_target",
|
||||
srcs = [
|
||||
"cpu_device_target.cc",
|
||||
],
|
||||
hdrs = [
|
||||
"cpu_device_target.h",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow/compiler/mlir/lite/quantization:device_target",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_context",
|
||||
"@llvm-project//mlir:IR",
|
||||
"@llvm-project//mlir:QuantOps",
|
||||
"@llvm-project//mlir:Support",
|
||||
],
|
||||
)
|
||||
|
||||
gentbl(
|
||||
name = "cpu_kernel_fusion_inc_gen",
|
||||
tbl_outs = [
|
||||
(
|
||||
"-gen-rewriters",
|
||||
"generated_cpu_kernel_fusion.inc",
|
||||
),
|
||||
],
|
||||
tblgen = "@llvm-project//mlir:mlir-tblgen",
|
||||
td_file = "cpu_kernel_fusion.td",
|
||||
td_srcs = [
|
||||
"@llvm-project//mlir:StdOpsTdFiles",
|
||||
"//tensorflow/compiler/mlir/xla:hlo_ops_td_files",
|
||||
"//tensorflow/compiler/mlir/lite/quantization:quantization_td_files",
|
||||
],
|
||||
)
|
@ -1,67 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/xla/cpu_device_target.h"
|
||||
|
||||
#include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project
|
||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
||||
#include "mlir/Support/LogicalResult.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_context.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
namespace ph = std::placeholders;
|
||||
|
||||
CpuDeviceTarget::CpuDeviceTarget(MLIRContext* ctx) : DeviceTarget(ctx) {
|
||||
RegisterKernel("generic.concat", {qi8_, qi8_, qi8_},
|
||||
quant::ScaleConstraintType::OutputInputSameScale);
|
||||
|
||||
// TODO(fengliuai): All the combinations are required to list. We need to
|
||||
// improve this.
|
||||
RegisterKernel("generic.reshape", {qi8_, any_},
|
||||
quant::ScaleConstraintType::OutputInputSameScale);
|
||||
RegisterKernel("generic.reshape", {any_, qi8_},
|
||||
quant::ScaleConstraintType::OutputInputSameScale);
|
||||
|
||||
RegisterKernel("generic.mul", {qi8_, qi8_, qi8_},
|
||||
quant::ScaleConstraintType::OutputInputFreeScale);
|
||||
RegisterKernel("generic.mul_add", {qi8_, qi8n_, any_, qi8_},
|
||||
std::bind(&CpuDeviceTarget::HandleMultiplyAccumulateScale,
|
||||
this, ph::_1, ph::_2, ph::_3, ph::_4));
|
||||
RegisterKernel("generic.matmul_add", {qi8_, qi8n_, any_, qi8_},
|
||||
std::bind(&CpuDeviceTarget::HandleMultiplyAccumulateScale,
|
||||
this, ph::_1, ph::_2, ph::_3, ph::_4));
|
||||
}
|
||||
|
||||
LogicalResult CpuDeviceTarget::HandleMultiplyAccumulateScale(
|
||||
quant::QuantizeContext* ctx, Operation* op,
|
||||
quant::AdjacentOperations* new_items, bool* changed) {
|
||||
auto bias_params = ctx->GetOperandParams(op, 2);
|
||||
if (!EmptyParams(bias_params)) {
|
||||
return success();
|
||||
}
|
||||
std::vector<quant::QuantParams> op_types{ctx->GetOperandParams(op, 0),
|
||||
ctx->GetOperandParams(op, 1)};
|
||||
auto bias_scale = GetUniformQuantizedTypeForBias(op_types);
|
||||
if (bias_scale && ctx->SetOperandParams(op, 2, bias_scale)) {
|
||||
*changed = true;
|
||||
new_items->push_back(op->getOperand(2).getDefiningOp());
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
@ -1,40 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_CPU_DEVICE_TARGET_H_
|
||||
#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_CPU_DEVICE_TARGET_H_
|
||||
|
||||
#include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project
|
||||
#include "mlir/Support/LogicalResult.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/device_target.h"
|
||||
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
// Target specs for cpu kernels
|
||||
class CpuDeviceTarget : public quant::DeviceTarget {
|
||||
public:
|
||||
explicit CpuDeviceTarget(MLIRContext* ctx);
|
||||
|
||||
private:
|
||||
LogicalResult HandleMultiplyAccumulateScale(
|
||||
quant::QuantizeContext* ctx, Operation* op,
|
||||
quant::AdjacentOperations* new_items, bool* changed);
|
||||
};
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_CPU_DEVICE_TARGET_H_
|
@ -1,347 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
||||
#include "mlir/IR/Attributes.h" // from @llvm-project
|
||||
#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project
|
||||
#include "mlir/IR/Function.h" // from @llvm-project
|
||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
||||
#include "mlir/IR/Matchers.h" // from @llvm-project
|
||||
#include "mlir/IR/PatternMatch.h" // from @llvm-project
|
||||
#include "mlir/IR/StandardTypes.h" // from @llvm-project
|
||||
#include "mlir/IR/Value.h" // from @llvm-project
|
||||
#include "mlir/Pass/Pass.h" // from @llvm-project
|
||||
#include "mlir/Support/LLVM.h" // from @llvm-project
|
||||
#include "mlir/Support/LogicalResult.h" // from @llvm-project
|
||||
#include "mlir/Transforms/DialectConversion.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
|
||||
#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
|
||||
#include "tensorflow/compiler/xla/client/lib/quantize.h"
|
||||
|
||||
#define DEBUG_TYPE "quant-kernel-fusion"
|
||||
|
||||
constexpr int kFakeQuantOperandsNum = 5;
|
||||
constexpr int kFakeQuantPerChannelOperandsNum = 6;
|
||||
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
namespace {
|
||||
|
||||
TypeAttr GetQuantSpec(Operation* op) {
|
||||
auto fake_quant = llvm::dyn_cast_or_null<CustomCallOp>(op);
|
||||
if (!fake_quant || fake_quant.getNumOperands() < kFakeQuantOperandsNum ||
|
||||
fake_quant.getNumOperands() > kFakeQuantPerChannelOperandsNum ||
|
||||
fake_quant.call_target_name() != "fake_quant_with_min_max_vars")
|
||||
return {};
|
||||
|
||||
DenseFPElementsAttr min, max;
|
||||
DenseIntElementsAttr bit_width, narrow_range, quant_dim;
|
||||
if (!matchPattern(fake_quant.getOperand(1), m_Constant(&min)) ||
|
||||
!matchPattern(fake_quant.getOperand(2), m_Constant(&max)) ||
|
||||
!matchPattern(fake_quant.getOperand(3), m_Constant(&bit_width)) ||
|
||||
!matchPattern(fake_quant.getOperand(4), m_Constant(&narrow_range)))
|
||||
return {};
|
||||
|
||||
auto bit_width_val = (*bit_width.attr_value_begin()).cast<IntegerAttr>();
|
||||
auto narrow_range_val = (*narrow_range.int_value_begin()).getSExtValue();
|
||||
int quant_dim_val = -1;
|
||||
if (fake_quant.getNumOperands() == kFakeQuantPerChannelOperandsNum &&
|
||||
matchPattern(fake_quant.getOperand(kFakeQuantPerChannelOperandsNum - 1),
|
||||
m_Constant(&quant_dim))) {
|
||||
quant_dim_val = (*quant_dim.int_value_begin()).getSExtValue();
|
||||
}
|
||||
|
||||
OpBuilder builder(op);
|
||||
Type input_type =
|
||||
fake_quant.getOperand(0).getType().cast<ShapedType>().getElementType();
|
||||
return quant::GetQuantizedTypeAttr(
|
||||
builder, input_type, min, max, quant_dim_val, bit_width_val,
|
||||
builder.getBoolAttr(narrow_range_val), /*is_signed=*/true);
|
||||
}
|
||||
|
||||
// Collects input values from outside for 'ops'.
|
||||
void CollectInputs(llvm::ArrayRef<Operation*> ops,
|
||||
llvm::SmallVectorImpl<Value>* inputs,
|
||||
llvm::SmallVectorImpl<Attribute>* input_specs) {
|
||||
for (Operation* op : ops) {
|
||||
for (Value operand : op->getOperands()) {
|
||||
if (std::find(inputs->begin(), inputs->end(), operand) != inputs->end()) {
|
||||
continue;
|
||||
}
|
||||
if (Operation* def_op = operand.getDefiningOp()) {
|
||||
if (std::find(ops.begin(), ops.end(), def_op) == ops.end()) {
|
||||
inputs->push_back(operand);
|
||||
}
|
||||
} else { // argument value
|
||||
inputs->push_back(operand);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (Value input : *inputs) {
|
||||
ShapedType input_type = input.getType().cast<ShapedType>();
|
||||
if (TypeAttr spec = GetQuantSpec(input.getDefiningOp())) {
|
||||
input_specs->push_back(spec);
|
||||
} else {
|
||||
input_specs->push_back(TypeAttr::get(input_type.getElementType()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collects values that are produced by 'ops' and have use outside of 'ops'.
|
||||
// TODO(fengliuai): if it is a single user and QDQ, write that to the specs.
|
||||
void CollectRets(llvm::ArrayRef<Operation*> ops,
|
||||
llvm::SmallVectorImpl<Value>* rets,
|
||||
llvm::SmallVectorImpl<Type>* ret_types,
|
||||
llvm::SmallVectorImpl<Attribute>* ret_specs) {
|
||||
for (Operation* op : ops) {
|
||||
// The constant will not be shared outside the region.
|
||||
if (llvm::isa<ConstantOp>(op)) continue;
|
||||
|
||||
for (Value result : op->getResults()) {
|
||||
for (Operation* user : result.getUsers()) {
|
||||
// If there are any user outside of 'ops'
|
||||
if (std::find(ops.begin(), ops.end(), user) == ops.end()) {
|
||||
ShapedType ret_type = result.getType().cast<ShapedType>();
|
||||
rets->push_back(result);
|
||||
ret_types->push_back(ret_type);
|
||||
if (TypeAttr spec = GetQuantSpec(user)) {
|
||||
ret_specs->push_back(spec);
|
||||
} else {
|
||||
ret_specs->push_back(TypeAttr::get(ret_type.getElementType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum FusedActivationFunc { NONE, RELU, RELU1, RELU6 };
|
||||
|
||||
#define FLOAT_EQ(value, x) fabs(value - x) <= 1e-6
|
||||
|
||||
// If the op is max(in, 0.0), we consider this is from Relu, so both this op
|
||||
// and constant 0.0 will be fused.
|
||||
// If the op is clamp(0.0, in, 1.0) or clamp(0.0, in, 6.0), we consider this is
|
||||
// from Relu1 or Relu6, so all the constants and this op will be fused.
|
||||
// Returns the activation function type.
|
||||
FusedActivationFunc FuseReluX(Operation* op,
|
||||
llvm::SmallVectorImpl<Operation*>* fused) {
|
||||
if (auto max = llvm::dyn_cast<xla_hlo::MaxOp>(op)) {
|
||||
Value min_val = max.rhs();
|
||||
llvm::SmallVector<Operation*, 4> broadcast_ops;
|
||||
if (auto broadcast = llvm::dyn_cast_or_null<xla_hlo::BroadcastInDimOp>(
|
||||
min_val.getDefiningOp())) {
|
||||
min_val = broadcast.operand();
|
||||
broadcast_ops.push_back(broadcast);
|
||||
}
|
||||
DenseFPElementsAttr min;
|
||||
if (!matchPattern(min_val, m_Constant(&min))) {
|
||||
// In case the min value is lhs.
|
||||
min_val = max.lhs();
|
||||
broadcast_ops.clear();
|
||||
if (auto broadcast = llvm::dyn_cast_or_null<xla_hlo::BroadcastInDimOp>(
|
||||
min_val.getDefiningOp())) {
|
||||
min_val = broadcast.operand();
|
||||
broadcast_ops.push_back(broadcast);
|
||||
}
|
||||
if (!matchPattern(min_val, m_Constant(&min))) {
|
||||
return NONE;
|
||||
}
|
||||
}
|
||||
if (!min.isSplat() ||
|
||||
!(FLOAT_EQ(min.getSplatValue().cast<FloatAttr>().getValueAsDouble(),
|
||||
0.0))) {
|
||||
return NONE;
|
||||
}
|
||||
|
||||
// Include the constant 0.0 as well, to avoid being quantized.
|
||||
fused->push_back(min_val.getDefiningOp());
|
||||
fused->append(broadcast_ops.begin(), broadcast_ops.end());
|
||||
fused->push_back(max);
|
||||
return RELU;
|
||||
}
|
||||
|
||||
if (auto clamp = llvm::dyn_cast<xla_hlo::ClampOp>(op)) {
|
||||
DenseFPElementsAttr lower, upper;
|
||||
if (!matchPattern(clamp.min(), m_Constant(&lower)) ||
|
||||
!matchPattern(clamp.max(), m_Constant(&upper)) || !lower.isSplat() ||
|
||||
!upper.isSplat() ||
|
||||
!(FLOAT_EQ(lower.getSplatValue().cast<FloatAttr>().getValueAsDouble(),
|
||||
0.0))) {
|
||||
return NONE;
|
||||
}
|
||||
|
||||
double upper_value =
|
||||
upper.getSplatValue().cast<FloatAttr>().getValueAsDouble();
|
||||
if (FLOAT_EQ(upper_value, 1.0) || FLOAT_EQ(upper_value, 6.0)) {
|
||||
fused->push_back(clamp.min().getDefiningOp());
|
||||
fused->push_back(clamp.max().getDefiningOp());
|
||||
fused->push_back(op);
|
||||
return (FLOAT_EQ(upper_value, 1.0) ? RELU1 : RELU6);
|
||||
}
|
||||
}
|
||||
return NONE;
|
||||
}
|
||||
|
||||
llvm::SmallVector<Value, 0> FuseOps(PatternRewriter* rewriter,
|
||||
const std::initializer_list<Value>& results,
|
||||
StringRef kernel) {
|
||||
// Collect all the operations to be fused.
|
||||
llvm::SmallVector<Operation*, 4> fused;
|
||||
llvm::SmallVector<Location, 4> locs;
|
||||
fused.reserve(results.size());
|
||||
locs.reserve(results.size());
|
||||
for (auto value : results) {
|
||||
Operation* op = value.getDefiningOp();
|
||||
fused.push_back(op);
|
||||
locs.push_back(op->getLoc());
|
||||
}
|
||||
|
||||
Operation* root = fused.back();
|
||||
|
||||
FusedActivationFunc act_func = FusedActivationFunc::NONE;
|
||||
// If there is Relu, Relu1 or Relu6, fuse it as well.
|
||||
if (results.size() > 0 && std::rbegin(results)->hasOneUse()) {
|
||||
act_func = FuseReluX(*std::rbegin(results)->user_begin(), &fused);
|
||||
}
|
||||
|
||||
// Collect inputs from outside to 'ops'.
|
||||
llvm::SmallVector<Value, 4> inputs;
|
||||
llvm::SmallVector<Attribute, 4> input_specs;
|
||||
CollectInputs(fused, &inputs, &input_specs);
|
||||
|
||||
// Collect outputs from 'ops' to outside.
|
||||
llvm::SmallVector<Value, 4> rets;
|
||||
llvm::SmallVector<Type, 4> ret_types;
|
||||
llvm::SmallVector<Attribute, 4> ret_specs;
|
||||
CollectRets(fused, &rets, &ret_types, &ret_specs);
|
||||
|
||||
// TODO(fengliuai): make activation function an attribute.
|
||||
std::string kernel_name;
|
||||
switch (act_func) {
|
||||
case RELU:
|
||||
kernel_name = llvm::Twine(kernel, "_relu").str();
|
||||
break;
|
||||
case RELU1:
|
||||
kernel_name = llvm::Twine(kernel, "_relu1").str();
|
||||
break;
|
||||
case RELU6:
|
||||
kernel_name = llvm::Twine(kernel, "_relu6").str();
|
||||
break;
|
||||
default:
|
||||
kernel_name = kernel.str();
|
||||
}
|
||||
|
||||
// Create the region op with the return.
|
||||
auto region = rewriter->create<quant::QuantizeRegionOp>(
|
||||
rewriter->getFusedLoc(locs), ret_types, inputs,
|
||||
rewriter->getArrayAttr(input_specs), rewriter->getArrayAttr(ret_specs),
|
||||
kernel_name);
|
||||
auto* body = new Block();
|
||||
region.body().push_back(body);
|
||||
|
||||
OpBuilder builder = OpBuilder::atBlockEnd(body);
|
||||
BlockAndValueMapping mapping;
|
||||
|
||||
// Make block arguments and add it to the block value mapping.
|
||||
for (Value input : inputs) {
|
||||
mapping.map(input, body->addArgument(input.getType()));
|
||||
}
|
||||
|
||||
// Clone the operations 'ops' to the region.
|
||||
for (Operation* op : fused) {
|
||||
builder.clone(*op, mapping);
|
||||
}
|
||||
|
||||
llvm::SmallVector<Value, 4> new_rets;
|
||||
new_rets.reserve(rets.size());
|
||||
for (auto ret : llvm::enumerate(rets)) {
|
||||
Value new_ret = mapping.lookupOrNull(ret.value());
|
||||
assert(new_ret && "couldn't find return value.");
|
||||
new_rets.push_back(new_ret);
|
||||
ret.value().replaceAllUsesWith(region.getResult(ret.index()));
|
||||
}
|
||||
builder.create<quant::ReturnOp>(builder.getUnknownLoc(), new_rets);
|
||||
|
||||
LLVM_DEBUG({
|
||||
assert(region.verify().Success && "failed to create quant region.");
|
||||
llvm::dbgs() << "\ncreated region: ";
|
||||
region.print(llvm::dbgs());
|
||||
llvm::dbgs() << "\n\n\n";
|
||||
});
|
||||
|
||||
// All uses of the fused ops are replaced, so the values in this vector
|
||||
// will not be used.
|
||||
SmallVector<Value, 0> new_values(root->getNumResults(), region.getResult(0));
|
||||
return new_values;
|
||||
}
|
||||
|
||||
struct CpuKernelFusionPass
|
||||
: public PassWrapper<CpuKernelFusionPass, FunctionPass> {
|
||||
explicit CpuKernelFusionPass() = default;
|
||||
CpuKernelFusionPass(const CpuKernelFusionPass&) {}
|
||||
|
||||
void runOnFunction() override;
|
||||
};
|
||||
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/xla/generated_cpu_kernel_fusion.inc"
|
||||
|
||||
void CpuKernelFusionPass::runOnFunction() {
|
||||
Operation* op = getOperation();
|
||||
MLIRContext* ctx = op->getContext();
|
||||
OwningRewritePatternList patterns;
|
||||
populateWithGenerated(ctx, &patterns);
|
||||
applyPatternsGreedily(op->getRegions(), patterns);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Creates an instance of the xla_hlo cpu kernel fusion pass.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreateCpuKernelFusionPass() {
|
||||
return std::make_unique<CpuKernelFusionPass>();
|
||||
}
|
||||
|
||||
static PassRegistration<CpuKernelFusionPass> pass(
|
||||
"xla-hlo-cpu-fusion", "Fuse xla hlo ops into cpu kernels");
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
@ -1,70 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
include "tensorflow/compiler/mlir/xla/ir/hlo_ops.td"
|
||||
include "mlir/IR/OpBase.td"
|
||||
include "mlir/Dialect/StandardOps/IR/Ops.td"
|
||||
|
||||
class Fused1Ops<string kernel> : NativeCodeCall<
|
||||
"FuseOps(&$_builder, {$0}, \"" # kernel # "\")">;
|
||||
class Fused2Ops<string kernel> : NativeCodeCall<
|
||||
"FuseOps(&$_builder, {$0, $1}, \"" # kernel # "\")">;
|
||||
class Fused3Ops<string kernel> : NativeCodeCall<
|
||||
"FuseOps(&$_builder, {$0, $1, $2}, \"" # kernel # "\")">;
|
||||
class Fused4Ops<string kernel> : NativeCodeCall<
|
||||
"FuseOps(&$_builder, {$0, $1, $2, $3}, \"" # kernel # "\")">;
|
||||
|
||||
// We shouldn't revisit those ops which have been fused. This constraint is
|
||||
// required because the greedy pattern rewriter will visit and match any new
|
||||
// ops. So when the source pattern are matched and wrapped by the quant region
|
||||
// op, these ops will be matched again. To prevent this, this constraint is
|
||||
// added to bypass any ops inside a quant region.
|
||||
def NeedsToBeFused : Constraint<CPred<
|
||||
"!$0.getDefiningOp()->getParentOfType<quant::QuantizeRegionOp>()">>;
|
||||
|
||||
// dummy example
|
||||
def : Pat<(HLO_AddOp:$add (HLO_MulOp:$mul $_, $_, $_), $_, $_),
|
||||
(Fused2Ops<"generic.mul_add"> $mul, $add),
|
||||
[(NeedsToBeFused $add)]>;
|
||||
|
||||
// add
|
||||
def : Pat<(HLO_AddOp:$add $_, $_, $_),
|
||||
(Fused1Ops<"generic.add"> $add),
|
||||
[(NeedsToBeFused $add)]>;
|
||||
|
||||
// reduce_window: maxpool, avgpool
|
||||
def : Pat<(HLO_ReduceWindowOp:$reduce $_, $_, $_, $_, $_, $_, $_),
|
||||
(Fused1Ops<"generic.reduce_window"> $reduce),
|
||||
[(NeedsToBeFused $reduce)]>;
|
||||
|
||||
// reshape
|
||||
def : Pat<(HLO_ReshapeOp:$reshape $_), (Fused1Ops<"generic.reshape"> $reshape),
|
||||
[(NeedsToBeFused $reshape)]>;
|
||||
|
||||
// broadcast
|
||||
def : Pat<(HLO_BroadcastInDimOp:$broadcast $_, $_),
|
||||
(Fused1Ops<"generic.broadcast"> $broadcast),
|
||||
[(NeedsToBeFused $broadcast)]>;
|
||||
|
||||
// dot -> add
|
||||
def : Pat<(HLO_AddOp:$add (HLO_DotOp:$dot $_, $_, $_), $_, $_),
|
||||
(Fused2Ops<"generic.biased_dot"> $dot, $add),
|
||||
[(NeedsToBeFused $add)]>;
|
||||
|
||||
// conv -> add
|
||||
def : Pat<(HLO_AddOp:$add
|
||||
(HLO_ConvOp:$conv $_, $_, $_, $_, $_, $_, $_, $_, $_, $_), $_, $_),
|
||||
(Fused2Ops<"generic.biased_conv"> $conv, $add),
|
||||
[(NeedsToBeFused $add)]>;
|
@ -1,175 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This transformation pass quantize the constant and rewrite the quantization
|
||||
// ops by xla_hlo primitive ops.
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "mlir/Dialect/Quant/QuantOps.h" // from @llvm-project
|
||||
#include "mlir/Dialect/StandardOps/IR/Ops.h" // from @llvm-project
|
||||
#include "mlir/IR/Attributes.h" // from @llvm-project
|
||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
||||
#include "mlir/IR/PatternMatch.h" // from @llvm-project
|
||||
#include "mlir/IR/StandardTypes.h" // from @llvm-project
|
||||
#include "mlir/IR/Value.h" // from @llvm-project
|
||||
#include "mlir/Pass/Pass.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_traits.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
|
||||
#include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
|
||||
#include "tensorflow/compiler/xla/client/lib/quantize.h"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// The pass to materialize the quantization results by xla primitive ops.
|
||||
//
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
namespace {
|
||||
|
||||
// This pattern matches the "constant->qcast->dcast" pattern and replaces it by
|
||||
// "quantized constant->xla_hlo.dequantize". If it only matches the
|
||||
// "non-constant->qcast->dcast" pattern, it will remove both the "qcast->dcast".
|
||||
// We chain the pattern as a whole to bypass the type checks of the normal
|
||||
// xla_hlo ops.
|
||||
// TODO(fengliuai): make this pass work for bf16 input.
|
||||
class RewriteDequantize : public OpRewritePattern<quant::DequantizeCastOp> {
|
||||
public:
|
||||
explicit RewriteDequantize(int64_t size, MLIRContext *context)
|
||||
: OpRewritePattern<quant::DequantizeCastOp>(context), size_(size) {}
|
||||
|
||||
LogicalResult matchAndRewrite(quant::DequantizeCastOp op,
|
||||
PatternRewriter &rewriter) const override {
|
||||
// quant.dcast
|
||||
// xla_hlo dequantize only takes min/max, so let's recover them from
|
||||
// the quantization parameters.
|
||||
Value dcast = op.arg();
|
||||
auto type = quant::QuantizedType::getQuantizedElementType(dcast.getType());
|
||||
if (!type || !type.isa<quant::UniformQuantizedType>()) {
|
||||
return failure();
|
||||
}
|
||||
auto qtype = type.cast<quant::UniformQuantizedType>();
|
||||
double scale = qtype.getScale();
|
||||
int64_t zero_point = qtype.getZeroPoint();
|
||||
float min = scale * (qtype.getStorageTypeMin() - zero_point);
|
||||
float max = scale * (qtype.getStorageTypeMax() - zero_point);
|
||||
|
||||
// quant.qcast
|
||||
auto qcast =
|
||||
llvm::dyn_cast_or_null<quant::QuantizeCastOp>(dcast.getDefiningOp());
|
||||
if (!qcast) return failure();
|
||||
|
||||
// constant
|
||||
DenseFPElementsAttr attr;
|
||||
// If it isn't a floating-point constant or the size is too small, let's
|
||||
// remove the quantization. Also the last dimension size should be a
|
||||
// multiplier of 4, so the shape isn't broken during packing and unpacking.
|
||||
if (!matchPattern(qcast.arg(), m_Constant(&attr)) ||
|
||||
attr.getNumElements() <= size_ ||
|
||||
attr.getType().getDimSize(attr.getType().getRank() - 1) % 4 != 0) {
|
||||
op.getResult().replaceAllUsesWith(qcast.arg());
|
||||
return success();
|
||||
}
|
||||
// TODO(fengliuai): implement transpose if it has high dimension.
|
||||
|
||||
// Create the quantized result
|
||||
auto quantized_result =
|
||||
quant::Quantize(attr, qtype).dyn_cast_or_null<DenseIntElementsAttr>();
|
||||
if (!quantized_result) {
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Pack the uint8 bits to uint32. The shape is changed from from
|
||||
// [n0, n1, ..., nk] to [n0, n1, ..., nk / 4].
|
||||
std::vector<uint8_t> raw_data;
|
||||
for (auto d : quantized_result.getValues<uint8_t>()) {
|
||||
raw_data.push_back(d);
|
||||
}
|
||||
// The packing might increase the data size by paddings.
|
||||
auto packed_data = xla::PackToUint32<uint8_t>(raw_data);
|
||||
auto packed_shape = attr.getType().getShape().vec();
|
||||
int lower_dims = std::accumulate(
|
||||
packed_shape.begin(),
|
||||
std::next(packed_shape.begin(), packed_shape.size() - 1), 1,
|
||||
std::multiplies<int>());
|
||||
packed_shape[packed_shape.size() - 1] = packed_data.size() / lower_dims;
|
||||
auto packed_type =
|
||||
RankedTensorType::get(packed_shape, rewriter.getIntegerType(32));
|
||||
|
||||
auto packed_quantized_result =
|
||||
DenseElementsAttr::get<uint32_t>(packed_type, packed_data);
|
||||
auto quantized_constant =
|
||||
rewriter.create<ConstantOp>(qcast.getLoc(), packed_quantized_result);
|
||||
|
||||
// Create the xla dequantize op with bf16 output
|
||||
auto dequantized_type = RankedTensorType::get(attr.getType().getShape(),
|
||||
rewriter.getBF16Type());
|
||||
auto dequantize = rewriter.create<DequantizeOp>(
|
||||
qcast.getLoc(), dequantized_type, quantized_constant,
|
||||
rewriter.getF32FloatAttr(min), rewriter.getF32FloatAttr(max),
|
||||
rewriter.getStringAttr("MIN_COMBINED"), rewriter.getBoolAttr(false),
|
||||
rewriter.getBoolAttr(false));
|
||||
|
||||
// Convert bf16 output back to f32
|
||||
rewriter.replaceOpWithNewOp<ConvertOp>(op, op.getResult().getType(),
|
||||
dequantize);
|
||||
return success();
|
||||
}
|
||||
|
||||
private:
|
||||
int64_t size_;
|
||||
};
|
||||
|
||||
// Materialize the quantization results by hlo primitive ops.
|
||||
struct MaterializeToXlaPass
|
||||
: public PassWrapper<MaterializeToXlaPass, FunctionPass> {
|
||||
explicit MaterializeToXlaPass() = default;
|
||||
MaterializeToXlaPass(const MaterializeToXlaPass &) {}
|
||||
|
||||
void runOnFunction() override;
|
||||
};
|
||||
|
||||
void MaterializeToXlaPass::runOnFunction() {
|
||||
FuncOp func = getFunction();
|
||||
MLIRContext *ctx = &getContext();
|
||||
|
||||
OwningRewritePatternList patterns;
|
||||
// TODO(fengliuai): make the size 6 configurable.
|
||||
patterns.insert<RewriteDequantize>(6, ctx);
|
||||
|
||||
applyPatternsGreedily(func, patterns);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Creates an instance of the xla_hlo dialect quantization propagation pass.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreateMaterializeToXlaPass() {
|
||||
return std::make_unique<MaterializeToXlaPass>();
|
||||
}
|
||||
|
||||
static PassRegistration<MaterializeToXlaPass> pass(
|
||||
"xla-hlo-materialize-quant",
|
||||
"Materialize the quantization results by xla primitve ops");
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
@ -1,7 +0,0 @@
|
||||
// TODO(fengliuai): automatically generate this file
|
||||
// TODO(fengliuai): add all the xla_hlo ops
|
||||
|
||||
static std::unique_ptr<quant::OpQuantSpec> GetOpQuantSpec(mlir::Operation *op) {
|
||||
auto spec = absl::make_unique<quant::OpQuantSpec>();
|
||||
return spec;
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_PASSES_H_
|
||||
#define TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_PASSES_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "mlir/IR/Function.h" // from @llvm-project
|
||||
#include "mlir/Pass/Pass.h" // from @llvm-project
|
||||
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
// Propagate the quantization information to all the tensors according to the
|
||||
// op quant spec.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreatePropagateQuantPass();
|
||||
|
||||
// Rewrite the graph and quantize the constant.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreateMaterializeToXlaPass();
|
||||
|
||||
// Fuse HLO ops into quantized regions.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreateCpuKernelFusionPass();
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
||||
|
||||
#endif // TENSORFLOW_COMPILER_MLIR_LITE_QUANTIZATION_XLA_PASSES_H_
|
@ -1,108 +0,0 @@
|
||||
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
// This transformation pass applies quantization propagation on xla_hlo dialect.
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
|
||||
#include "absl/memory/memory.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "mlir/IR/MLIRContext.h" // from @llvm-project
|
||||
#include "mlir/IR/PatternMatch.h" // from @llvm-project
|
||||
#include "mlir/IR/Value.h" // from @llvm-project
|
||||
#include "mlir/Pass/Pass.h" // from @llvm-project
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_config.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_context.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/quantization_utils.h"
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/xla/cpu_device_target.h"
|
||||
|
||||
// NOLINTNEXTLINE
|
||||
static llvm::cl::opt<bool> disable_per_channel(
|
||||
"xla-disable-per-channel", llvm::cl::value_desc("bool"),
|
||||
llvm::cl::desc("Whether disable per-channel quantized weights."),
|
||||
llvm::cl::init(false));
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// The quantization propagation Pass.
|
||||
//
|
||||
namespace mlir {
|
||||
namespace xla_hlo {
|
||||
|
||||
namespace {
|
||||
|
||||
// Applies the quantization propagation on the input function. During the
|
||||
// propagation, two facts are respected:
|
||||
// - The quantization type (params) of the ops in the function
|
||||
// - The quantization spec for the ops
|
||||
// The propagation results should assign quantization types to all the tensors
|
||||
// and the two restrictions are respected.
|
||||
struct PropagateQuantPass
|
||||
: public PassWrapper<PropagateQuantPass, FunctionPass> {
|
||||
explicit PropagateQuantPass() = default;
|
||||
PropagateQuantPass(const PropagateQuantPass &) {}
|
||||
|
||||
void runOnFunction() override;
|
||||
};
|
||||
|
||||
#include "tensorflow/compiler/mlir/lite/quantization/xla/op_quant_spec.inc"
|
||||
|
||||
void PropagateQuantPass::runOnFunction() {
|
||||
FuncOp func = getFunction();
|
||||
// TODO(fengliuai): deprecate this old code generation path.
|
||||
// XLA only support uint8/uint16 quantization for now.
|
||||
ApplyQuantizationParamsPropagation(func, /*is_signed*/ false,
|
||||
disable_per_channel, GetOpQuantSpec);
|
||||
|
||||
CpuDeviceTarget spec(&getContext());
|
||||
quant::QuantizeContext ctx(func, spec);
|
||||
|
||||
std::vector<quant::QuantizeRegionOp> work_list = ctx.GetAllOps();
|
||||
bool changed = false;
|
||||
while (!work_list.empty()) {
|
||||
quant::QuantizeRegionOp op = work_list.back();
|
||||
work_list.pop_back();
|
||||
|
||||
llvm::SmallVector<Operation *, 4> new_items;
|
||||
if (failed(ctx.Handle(op, &new_items, &changed))) {
|
||||
// The IR is still valid, thus we shouldn't fail.
|
||||
signalPassFailure();
|
||||
}
|
||||
for (auto item : new_items) {
|
||||
if (auto reg = llvm::dyn_cast_or_null<quant::QuantizeRegionOp>(item))
|
||||
work_list.push_back(reg);
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) return;
|
||||
|
||||
if (failed(ctx.Finalize())) {
|
||||
signalPassFailure();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Creates an instance of the xla_hlo dialect quantization propagation pass.
|
||||
std::unique_ptr<OperationPass<FuncOp>> CreatePropagateQuantPass() {
|
||||
return std::make_unique<PropagateQuantPass>();
|
||||
}
|
||||
|
||||
static PassRegistration<PropagateQuantPass> pass(
|
||||
"xla-hlo-propagate-quant", "Propagate quantization information");
|
||||
|
||||
} // namespace xla_hlo
|
||||
} // namespace mlir
|
@ -1,33 +0,0 @@
|
||||
load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests")
|
||||
|
||||
package(licenses = ["notice"])
|
||||
|
||||
glob_lit_tests(
|
||||
data = [
|
||||
":graph_config_files",
|
||||
":test_utilities",
|
||||
],
|
||||
driver = "@llvm-project//mlir:run_lit.sh",
|
||||
exclude = ["fadd_quant.mlir"],
|
||||
test_file_exts = ["mlir"],
|
||||
)
|
||||
|
||||
# Bundle together all of the test utilities that are used by tests.
|
||||
filegroup(
|
||||
name = "test_utilities",
|
||||
testonly = True,
|
||||
data = [
|
||||
"//tensorflow/compiler/aot:tfcompile",
|
||||
"//tensorflow/compiler/mlir:tf-opt",
|
||||
"@llvm-project//llvm:FileCheck",
|
||||
"@llvm-project//llvm:not",
|
||||
],
|
||||
)
|
||||
|
||||
# Bundle together all the graph files that are used by the tests.
|
||||
filegroup(
|
||||
name = "graph_config_files",
|
||||
srcs = glob(
|
||||
["**/*.pbtxt"],
|
||||
),
|
||||
)
|
@ -1,199 +0,0 @@
|
||||
// RUN: tf-opt -xla-hlo-cpu-fusion %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: @mul_add_source
|
||||
func @mul_add_source(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>, %arg2: tensor<4xf32>) -> (tensor<4xf32>) {
|
||||
%0 = "xla_hlo.multiply"(%arg0, %arg1) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
%1 = "xla_hlo.add"(%0, %arg2) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
return %1 : tensor<4xf32>
|
||||
|
||||
// CHECK: %[[region:.*]] = "quant.region"(%arg0, %arg1, %arg2) ( {
|
||||
// CHECK: ^bb0(%arg3: tensor<4xf32>, %arg4: tensor<4xf32>, %arg5: tensor<4xf32>): // no predecessors
|
||||
// CHECK: %[[mul:.*]] = xla_hlo.multiply %arg3, %arg4 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
// CHECK: %[[add:.*]] = xla_hlo.add %[[mul]], %arg5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
// CHECK: "quant.return"(%[[add]]) : (tensor<4xf32>) -> ()
|
||||
// CHECK: }) {input_specs = [f32, f32, f32], logical_kernel = "generic.mul_add", output_specs = [f32]} : (tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
// CHECK: return %[[region]] : tensor<4xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @mul_add_annotated
|
||||
func @mul_add_annotated(%arg0: tensor<2x4xf32>, %arg1: tensor<2x4xf32>, %arg2: tensor<2x4xf32>) -> (tensor<2x4xf32>) {
|
||||
%cst = constant dense<0.0> : tensor<f32>
|
||||
%cst_0 = constant dense<255.0> : tensor<f32>
|
||||
%cst_1 = constant dense<8> : tensor<i32>
|
||||
%cst_2 = constant dense<false> : tensor<i1>
|
||||
%qin = "xla_hlo.custom_call"(%arg0, %cst, %cst_0, %cst_1, %cst_2) {backend_config = "", call_target_name = "fake_quant_with_min_max_vars",
|
||||
has_side_effect = false, name = "custom-call.1"} : (tensor<2x4xf32>, tensor<f32>, tensor<f32>, tensor<i32>, tensor<i1>) -> tensor<2x4xf32>
|
||||
%qw = "xla_hlo.custom_call"(%arg1, %cst, %cst_0, %cst_1, %cst_2) {backend_config = "", call_target_name = "fake_quant_with_min_max_vars",
|
||||
has_side_effect = false, name = "custom-call.2"} : (tensor<2x4xf32>, tensor<f32>, tensor<f32>, tensor<i32>, tensor<i1>) -> tensor<2x4xf32>
|
||||
%0 = "xla_hlo.multiply"(%qin, %qw) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<2x4xf32>, tensor<2x4xf32>) -> tensor<2x4xf32>
|
||||
%1 = "xla_hlo.add"(%0, %arg2) {broadcast_dimensions = dense<[]> : tensor<0xi64>} : (tensor<2x4xf32>, tensor<2x4xf32>) -> tensor<2x4xf32>
|
||||
%r = "xla_hlo.custom_call"(%1, %cst, %cst_0, %cst_1, %cst_2) {backend_config = "", call_target_name = "fake_quant_with_min_max_vars",
|
||||
has_side_effect = false, name = "custom-call.3"} : (tensor<2x4xf32>, tensor<f32>, tensor<f32>, tensor<i32>, tensor<i1>) -> tensor<2x4xf32>
|
||||
return %r : tensor<2x4xf32>
|
||||
|
||||
// CHECK: %[[region:.*]] = "quant.region"
|
||||
// CHECK: ^bb0(%arg3: tensor<2x4xf32>, %arg4: tensor<2x4xf32>, %arg5: tensor<2x4xf32>): // no predecessors
|
||||
// CHECK: %[[mul:.*]] = xla_hlo.multiply %arg3, %arg4 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<2x4xf32>
|
||||
// CHECK: %[[add:.*]] = xla_hlo.add %[[mul]], %arg5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<2x4xf32>
|
||||
// CHECK: "quant.return"(%[[add]]) : (tensor<2x4xf32>) -> ()
|
||||
// CHECK: }) {input_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>, !quant.uniform<i8:f32, 1.000000e+00:-128>, f32],
|
||||
// CHECK-SAME: logical_kernel = "generic.mul_add", output_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>]} :
|
||||
// CHECK-SAME: (tensor<2x4xf32>, tensor<2x4xf32>, tensor<2x4xf32>) -> tensor<2x4xf32>
|
||||
// CHECK: %[[r:.*]] = "xla_hlo.custom_call"(%[[region]]
|
||||
// CHECK: return %[[r]] : tensor<2x4xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @reduce_window
|
||||
func @reduce_window(%arg0: tensor<1x28x28x32xf32>, %arg1: tensor<f32>) -> (tensor<1x14x14x32xf32>) {
|
||||
%0 = "xla_hlo.reduce_window"(%arg0, %arg1) ({
|
||||
^bb0(%arg2: tensor<f32>, %arg3: tensor<f32>):
|
||||
%1 = xla_hlo.maximum %arg2, %arg3 : tensor<f32>
|
||||
"xla_hlo.return"(%1) : (tensor<f32>) -> ()
|
||||
}) {
|
||||
base_dilations = dense<1> : tensor<4xi64>,
|
||||
padding = dense<0> : tensor<4x2xi64>,
|
||||
window_dilations = dense<1> : tensor<4xi64>,
|
||||
window_dimensions = dense<[1, 2, 2, 1]> : tensor<4xi64>,
|
||||
window_strides = dense<[1, 2, 2, 1]> : tensor<4xi64>
|
||||
} : (tensor<1x28x28x32xf32>, tensor<f32>) -> tensor<1x14x14x32xf32>
|
||||
return %0 : tensor<1x14x14x32xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1) ( {
|
||||
// CHECK: ^bb0(%arg2: tensor<1x28x28x32xf32>, %arg3: tensor<f32>): // no predecessors
|
||||
// CHECK: %[[rw:.*]] = "xla_hlo.reduce_window"(%arg2, %arg3) ( {
|
||||
// CHECK: ^bb0(%arg4: tensor<f32>, %arg5: tensor<f32>): // no predecessors
|
||||
// CHECK: %[[max:.*]] = xla_hlo.maximum %arg4, %arg5 : tensor<f32>
|
||||
// CHECK: "xla_hlo.return"(%[[max]]) : (tensor<f32>) -> ()
|
||||
// CHECK: })
|
||||
// CHECK: "quant.return"(%[[rw]])
|
||||
// CHECK: }) {input_specs = [f32, f32], logical_kernel = "generic.reduce_window", output_specs = [f32]}
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @reshape
|
||||
func @reshape(%arg0: tensor<1x7x7x64xf32>) -> (tensor<1x3136xf32>) {
|
||||
%0 = "xla_hlo.reshape"(%arg0) : (tensor<1x7x7x64xf32>) -> tensor<1x3136xf32>
|
||||
return %0 : tensor<1x3136xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0)
|
||||
// CHECK: logical_kernel = "generic.reshape"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @broadcast
|
||||
func @broadcast(%arg0: tensor<64xf32>) -> (tensor<1x14x14x64xf32>) {
|
||||
%0 = "xla_hlo.broadcast_in_dim"(%arg0) {broadcast_dimensions = dense<3> : tensor<1xi64>} : (tensor<64xf32>) -> tensor<1x14x14x64xf32>
|
||||
return %0 : tensor<1x14x14x64xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0)
|
||||
// CHECK: logical_kernel = "generic.broadcast"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_dot
|
||||
func @biased_dot(%arg0: tensor<1x1024xf32>, %arg1: tensor<1024x10xf32>, %arg2: tensor<1x10xf32>) -> (tensor<1x10xf32>) {
|
||||
%0 = "xla_hlo.dot"(%arg0, %arg1) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x1024xf32>, tensor<1024x10xf32>) -> tensor<1x10xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x10xf32>
|
||||
return %1 : tensor<1x10xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: xla_hlo.dot
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: logical_kernel = "generic.biased_dot"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_conv
|
||||
func @biased_conv(%arg0: tensor<1x14x14x32xf32>, %arg1: tensor<5x5x32x64xf32>, %arg2: tensor<1x14x14x64xf32>) -> (tensor<1x14x14x64xf32>) {
|
||||
%0 = "xla_hlo.convolution"(%arg0, %arg1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64,
|
||||
input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64,
|
||||
kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64,
|
||||
output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, lhs_dilations = dense<1> : tensor<2xi64>,
|
||||
padding = dense<2> : tensor<2x2xi64>, precision_config = ["DEFAULT", "DEFAULT"], rhs_dilations = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>
|
||||
} : (tensor<1x14x14x32xf32>, tensor<5x5x32x64xf32>) -> tensor<1x14x14x64xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x14x14x64xf32>
|
||||
return %1 : tensor<1x14x14x64xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: xla_hlo.convolution
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: logical_kernel = "generic.biased_conv"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_dot_relu
|
||||
func @biased_dot_relu(%arg0: tensor<1x1024xf32>, %arg1: tensor<1024x10xf32>, %arg2: tensor<1x10xf32>) -> (tensor<1x10xf32>) {
|
||||
%cst = constant dense<0.0> : tensor<1x10xf32>
|
||||
%0 = "xla_hlo.dot"(%arg0, %arg1) {precision_config = ["DEFAULT", "DEFAULT"]} : (tensor<1x1024xf32>, tensor<1024x10xf32>) -> tensor<1x10xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x10xf32>
|
||||
%2 = xla_hlo.maximum %1, %cst : tensor<1x10xf32>
|
||||
return %2 : tensor<1x10xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: constant
|
||||
// CHECK: xla_hlo.dot
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: xla_hlo.maximum
|
||||
// CHECK: logical_kernel = "generic.biased_dot_relu"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_conv_relu
|
||||
func @biased_conv_relu(%arg0: tensor<1x14x14x32xf32>, %arg1: tensor<5x5x32x64xf32>, %arg2: tensor<1x14x14x64xf32>) -> (tensor<1x14x14x64xf32>) {
|
||||
%cst = constant dense<0.0> : tensor<1x14x14x64xf32>
|
||||
%0 = "xla_hlo.convolution"(%arg0, %arg1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64,
|
||||
input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64,
|
||||
kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64,
|
||||
output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, lhs_dilations = dense<1> : tensor<2xi64>,
|
||||
padding = dense<2> : tensor<2x2xi64>, precision_config = ["DEFAULT", "DEFAULT"], rhs_dilations = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>
|
||||
} : (tensor<1x14x14x32xf32>, tensor<5x5x32x64xf32>) -> tensor<1x14x14x64xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x14x14x64xf32>
|
||||
%2 = xla_hlo.maximum %1, %cst : tensor<1x14x14x64xf32>
|
||||
return %2 : tensor<1x14x14x64xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: constant
|
||||
// CHECK: xla_hlo.convolution
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: xla_hlo.maximum
|
||||
// CHECK: logical_kernel = "generic.biased_conv_relu"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_conv_relu_shared
|
||||
func @biased_conv_relu_shared(%arg0: tensor<1x14x14x32xf32>, %arg1: tensor<5x5x32x64xf32>, %arg2: tensor<1x14x14x64xf32>) -> (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) {
|
||||
%cst = constant dense<0.0> : tensor<1x14x14x64xf32>
|
||||
%0 = "xla_hlo.convolution"(%arg0, %arg1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64,
|
||||
input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64,
|
||||
kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64,
|
||||
output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, lhs_dilations = dense<1> : tensor<2xi64>,
|
||||
padding = dense<2> : tensor<2x2xi64>, precision_config = ["DEFAULT", "DEFAULT"], rhs_dilations = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>
|
||||
} : (tensor<1x14x14x32xf32>, tensor<5x5x32x64xf32>) -> tensor<1x14x14x64xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x14x14x64xf32>
|
||||
%2 = xla_hlo.maximum %1, %cst : tensor<1x14x14x64xf32>
|
||||
return %cst, %2 : tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: constant
|
||||
// CHECK: xla_hlo.convolution
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: %[[max:.*]] = xla_hlo.maximum
|
||||
// CHECK: "quant.return"(%[[max]])
|
||||
// CHECK: logical_kernel = "generic.biased_conv_relu"
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @biased_conv_relu6
|
||||
func @biased_conv_relu6(%arg0: tensor<1x14x14x32xf32>, %arg1: tensor<5x5x32x64xf32>, %arg2: tensor<1x14x14x64xf32>) -> (tensor<1x14x14x64xf32>) {
|
||||
%min = constant dense<0.0> : tensor<1x14x14x64xf32>
|
||||
%max = constant dense<6.0> : tensor<1x14x14x64xf32>
|
||||
%0 = "xla_hlo.convolution"(%arg0, %arg1) {batch_group_count = 1 : i64, dimension_numbers = {input_batch_dimension = 0 : i64, input_feature_dimension = 3 : i64,
|
||||
input_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>, kernel_input_feature_dimension = 2 : i64, kernel_output_feature_dimension = 3 : i64,
|
||||
kernel_spatial_dimensions = dense<[0, 1]> : tensor<2xi64>, output_batch_dimension = 0 : i64, output_feature_dimension = 3 : i64,
|
||||
output_spatial_dimensions = dense<[1, 2]> : tensor<2xi64>}, feature_group_count = 1 : i64, lhs_dilations = dense<1> : tensor<2xi64>,
|
||||
padding = dense<2> : tensor<2x2xi64>, precision_config = ["DEFAULT", "DEFAULT"], rhs_dilations = dense<1> : tensor<2xi64>, window_strides = dense<1> : tensor<2xi64>
|
||||
} : (tensor<1x14x14x32xf32>, tensor<5x5x32x64xf32>) -> tensor<1x14x14x64xf32>
|
||||
%1 = xla_hlo.add %0, %arg2 : tensor<1x14x14x64xf32>
|
||||
%2 = "xla_hlo.clamp"(%min, %1, %max) : (tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>, tensor<1x14x14x64xf32>) -> tensor<1x14x14x64xf32>
|
||||
return %2 : tensor<1x14x14x64xf32>
|
||||
|
||||
// CHECK: "quant.region"(%arg0, %arg1, %arg2)
|
||||
// CHECK: constant
|
||||
// CHECK: constant
|
||||
// CHECK: xla_hlo.convolution
|
||||
// CHECK: xla_hlo.add
|
||||
// CHECK: xla_hlo.clamp
|
||||
// CHECK: logical_kernel = "generic.biased_conv_relu6"
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
# RUN: not tfcompile --graph=%s.pbtxt --config=%s.config.pbtxt --experimental_quantize --cpp_class="::test::fadd_quant" 2>&1 | FileCheck %s -dump-input-on-failure
|
||||
|
||||
# TODO(fengliuai): update this file with the progress of the implementation
|
||||
|
||||
// CHECK: "quant.region"
|
||||
// CHECK: ^bb0(%arg0: tensor<2x4xf32>, %arg1: tensor<2x4xf32>): // no predecessors
|
||||
// CHECK: xla_hlo.add %arg0, %arg1
|
||||
// CHECK: "quant.return"
|
||||
// CHECK: }) {input_specs = [!quant.uniform<i8:f32, 0.49803921568627452:-128>, !quant.uniform<i8:f32, 0.49803921568627452:-128>],
|
||||
// CHECK-SAME: logical_kernel = "generic.add", output_specs = [!quant.uniform<i8:f32, 0.49803921568627452:-128>]}
|
@ -1,26 +0,0 @@
|
||||
feed {
|
||||
id { node_name: "input0" }
|
||||
shape {
|
||||
dim { size: 2 }
|
||||
dim { size: 4 }
|
||||
}
|
||||
}
|
||||
feed {
|
||||
id { node_name: "input1" }
|
||||
shape {
|
||||
dim { size: 2 }
|
||||
dim { size: 4 }
|
||||
}
|
||||
}
|
||||
|
||||
fetch {
|
||||
id { node_name: "Add/FakeQuantWithMinMaxVars" }
|
||||
shape {
|
||||
dim { size: 2 }
|
||||
dim { size: 4 }
|
||||
}
|
||||
}
|
||||
|
||||
conversion_options {
|
||||
custom_fake_quant_op_calls: true
|
||||
}
|
@ -1,218 +0,0 @@
|
||||
node: {
|
||||
name: "Add/FakeQuantWithMinMaxVars"
|
||||
op: "FakeQuantWithMinMaxVars"
|
||||
input: "Add"
|
||||
input: "Add/FakeQuantWithMinMaxVars/min"
|
||||
input: "Add/FakeQuantWithMinMaxVars/max"
|
||||
attr: {
|
||||
key: "num_bits"
|
||||
value: {
|
||||
i: 8
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "narrow_range"
|
||||
value: {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "Add/FakeQuantWithMinMaxVars/min"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "Add/FakeQuantWithMinMaxVars/max"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 127.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "Add"
|
||||
op: "Add"
|
||||
input: "input0/FakeQuantWithMinMaxVars"
|
||||
input: "input1/FakeQuantWithMinMaxVars"
|
||||
attr {
|
||||
key: "T"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input0/FakeQuantWithMinMaxVars"
|
||||
op: "FakeQuantWithMinMaxVars"
|
||||
input: "input0"
|
||||
input: "input0/FakeQuantWithMinMaxVars/min"
|
||||
input: "input0/FakeQuantWithMinMaxVars/max"
|
||||
attr: {
|
||||
key: "num_bits"
|
||||
value: {
|
||||
i: 8
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "narrow_range"
|
||||
value: {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input0/FakeQuantWithMinMaxVars/min"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input0/FakeQuantWithMinMaxVars/max"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 127.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "input0"
|
||||
op: "Placeholder"
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input1/FakeQuantWithMinMaxVars"
|
||||
op: "FakeQuantWithMinMaxVars"
|
||||
input: "input1"
|
||||
input: "input1/FakeQuantWithMinMaxVars/min"
|
||||
input: "input1/FakeQuantWithMinMaxVars/max"
|
||||
attr: {
|
||||
key: "num_bits"
|
||||
value: {
|
||||
i: 8
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "narrow_range"
|
||||
value: {
|
||||
b: false
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input1/FakeQuantWithMinMaxVars/min"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 0.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node: {
|
||||
name: "input1/FakeQuantWithMinMaxVars/max"
|
||||
op: "Const"
|
||||
attr: {
|
||||
key: "value"
|
||||
value: {
|
||||
tensor: {
|
||||
dtype: DT_FLOAT
|
||||
tensor_shape: {
|
||||
}
|
||||
float_val: 127.0
|
||||
}
|
||||
}
|
||||
}
|
||||
attr: {
|
||||
key: "dtype"
|
||||
value: {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
node {
|
||||
name: "input1"
|
||||
op: "Placeholder"
|
||||
attr {
|
||||
key: "dtype"
|
||||
value {
|
||||
type: DT_FLOAT
|
||||
}
|
||||
}
|
||||
}
|
||||
versions {
|
||||
producer: 27
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
// RUN: tf-opt -xla-hlo-materialize-quant %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @quantize_rewrite
|
||||
func @quantize_rewrite(%arg0: tensor<2x4xf32>) -> tensor<2x4xf32> {
|
||||
// CHECK: %[[qcst:.*]] = constant dense<{{\[\[}}21004416], [-1056997248]]> : tensor<2x1xi32>
|
||||
// CHECK-NEXT: %[[dq:.*]] = "xla_hlo.dequantize"(%[[qcst]]) {is_16bits = false, max_range = 0.996078431 : f32, min_range = -1.00392163 : f32,
|
||||
// CHECK-SAME: mode = "MIN_COMBINED", transpose_output = false} : (tensor<2x1xi32>) -> tensor<2x4xbf16>
|
||||
// CHECK-NEXT: %[[cast:.*]] = "xla_hlo.convert"(%[[dq]]) : (tensor<2x4xbf16>) -> tensor<2x4xf32>
|
||||
// CHECK-NEXT: %[[mul:.*]] = xla_hlo.multiply %arg0, %[[cast]] : tensor<2x4xf32>
|
||||
// CHECK-NEXT: return %[[mul]] : tensor<2x4xf32>
|
||||
|
||||
%w = constant dense<[[-1.0, -0.5, 0.0, 0.0], [0.5, 1.0, 0.0, 0.0]]> : tensor<2x4xf32>
|
||||
%q = "quant.qcast"(%w) : (tensor<2x4xf32>) -> tensor<2x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>
|
||||
%dq = "quant.dcast"(%q) : (tensor<2x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>) -> tensor<2x4xf32>
|
||||
%mul = xla_hlo.multiply %arg0, %dq : tensor<2x4xf32>
|
||||
return %mul: tensor<2x4xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @quantize_small
|
||||
func @quantize_small(%arg0: tensor<1x4xf32>) -> tensor<1x4xf32> {
|
||||
// CHECK: %[[w:.*]] = constant dense<1.000000e+00> : tensor<1x4xf32>
|
||||
// CHECK-NEXT: %[[mul:.*]] = xla_hlo.multiply %arg0, %[[w]] : tensor<1x4xf32>
|
||||
// CHECK-NEXT: return %[[mul]] : tensor<1x4xf32>
|
||||
|
||||
%w = constant dense<1.0> : tensor<1x4xf32>
|
||||
%q = "quant.qcast"(%w) : (tensor<1x4xf32>) -> tensor<1x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>
|
||||
%dq = "quant.dcast"(%q) : (tensor<1x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>) -> tensor<1x4xf32>
|
||||
%mul = xla_hlo.multiply %arg0, %dq : tensor<1x4xf32>
|
||||
return %mul: tensor<1x4xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @quantize_non_cst
|
||||
func @quantize_non_cst(%arg0: tensor<2x4xf32>) -> tensor<2x4xf32> {
|
||||
// CHECK-NEXT: %[[mul:.*]] = xla_hlo.multiply %arg0, %arg0 : tensor<2x4xf32>
|
||||
// CHECK-NEXT: return %[[mul]] : tensor<2x4xf32>
|
||||
|
||||
%q = "quant.qcast"(%arg0) : (tensor<2x4xf32>) -> tensor<2x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>
|
||||
%dq = "quant.dcast"(%q) : (tensor<2x4x!quant.uniform<u8:f32, 0.0078431372549019607:128>>) -> tensor<2x4xf32>
|
||||
%mul = xla_hlo.multiply %arg0, %dq : tensor<2x4xf32>
|
||||
return %mul: tensor<2x4xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @quantize_non_4x
|
||||
func @quantize_non_4x(%arg0: tensor<2x5xf32>) -> tensor<2x5xf32> {
|
||||
// CHECK: %[[w:.*]] = constant dense<1.000000e+00> : tensor<2x5xf32>
|
||||
// CHECK-NEXT: %[[mul:.*]] = xla_hlo.multiply %arg0, %[[w]] : tensor<2x5xf32>
|
||||
// CHECK-NEXT: return %[[mul]] : tensor<2x5xf32>
|
||||
|
||||
%w = constant dense<1.0> : tensor<2x5xf32>
|
||||
%q = "quant.qcast"(%w) : (tensor<2x5xf32>) -> tensor<2x5x!quant.uniform<u8:f32, 0.0078431372549019607:128>>
|
||||
%dq = "quant.dcast"(%q) : (tensor<2x5x!quant.uniform<u8:f32, 0.0078431372549019607:128>>) -> tensor<2x5xf32>
|
||||
%mul = xla_hlo.multiply %arg0, %dq : tensor<2x5xf32>
|
||||
return %mul: tensor<2x5xf32>
|
||||
}
|
@ -1,69 +0,0 @@
|
||||
// RUN: tf-opt -xla-hlo-propagate-quant %s | FileCheck %s --dump-input-on-failure
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @mul_add_source_no_params
|
||||
func @mul_add_source_no_params(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>, %arg2: tensor<4xf32>) -> (tensor<4xf32>) {
|
||||
%region = "quant.region"(%arg0, %arg1, %arg2) ( {
|
||||
^bb0(%arg3: tensor<4xf32>, %arg4: tensor<4xf32>, %arg5: tensor<4xf32>): // no predecessors
|
||||
%mul = xla_hlo.multiply %arg3, %arg4 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
%add = xla_hlo.add %mul, %arg5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
"quant.return"(%add) : (tensor<4xf32>) -> ()
|
||||
}) {input_specs = [f32, f32, f32], logical_kernel = "generic.mul_add", output_specs = [f32]} :
|
||||
(tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
return %region : tensor<4xf32>
|
||||
|
||||
// CHECK: input_specs = [f32, f32, f32]
|
||||
// CHECK-SAME: output_specs = [f32]
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @mul_add_annotated_no_narrow_range
|
||||
func @mul_add_annotated_no_narrow_range(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>, %arg2: tensor<4xf32>) -> (tensor<4xf32>) {
|
||||
%region = "quant.region"(%arg0, %arg1, %arg2) ( {
|
||||
^bb0(%arg3: tensor<4xf32>, %arg4: tensor<4xf32>, %arg5: tensor<4xf32>): // no predecessors
|
||||
%mul = xla_hlo.multiply %arg3, %arg4 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
%add = xla_hlo.add %mul, %arg5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
"quant.return"(%add) : (tensor<4xf32>) -> ()
|
||||
}) {input_specs = [!quant.uniform<i8:f32, 1.0:-128>, !quant.uniform<i8:f32, 1.0:-128>, f32],
|
||||
logical_kernel = "generic.mul_add", output_specs = [!quant.uniform<i8:f32, 1.0:-128>]} :
|
||||
(tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
return %region : tensor<4xf32>
|
||||
|
||||
// CHECK: input_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>, !quant.uniform<i8:f32, 1.000000e+00:-128>, f32]
|
||||
// CHECK-SAME: output_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>]
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @mul_add_annotated
|
||||
func @mul_add_annotated(%arg0: tensor<4xf32>, %arg1: tensor<4xf32>, %arg2: tensor<4xf32>) -> (tensor<4xf32>) {
|
||||
%region = "quant.region"(%arg0, %arg1, %arg2) ( {
|
||||
^bb0(%arg3: tensor<4xf32>, %arg4: tensor<4xf32>, %arg5: tensor<4xf32>): // no predecessors
|
||||
%mul = xla_hlo.multiply %arg3, %arg4 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
%add = xla_hlo.add %mul, %arg5 {broadcast_dimensions = dense<[]> : tensor<0xi64>} : tensor<4xf32>
|
||||
"quant.return"(%add) : (tensor<4xf32>) -> ()
|
||||
}) {input_specs = [!quant.uniform<i8:f32, 1.0:-128>, !quant.uniform<i8<-127:127>:f32, 1.0:-128>, f32],
|
||||
logical_kernel = "generic.mul_add", output_specs = [!quant.uniform<i8:f32, 1.0:-128>]} :
|
||||
(tensor<4xf32>, tensor<4xf32>, tensor<4xf32>) -> tensor<4xf32>
|
||||
return %region : tensor<4xf32>
|
||||
|
||||
// CHECK: input_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>, !quant.uniform<i8<-127:127>:f32, 1.000000e+00:-128>, !quant.uniform<i32:f32, 1.000000e+00>]
|
||||
// CHECK-SAME: output_specs = [!quant.uniform<i8:f32, 1.000000e+00:-128>]
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @same_scale_1_1
|
||||
func @same_scale_1_1(%arg0: tensor<1x7x7x64xf32>) -> (tensor<1x3136xf32>) {
|
||||
%region = "quant.region"(%arg0) ( {
|
||||
^bb0(%arg1: tensor<1x7x7x64xf32>): // no predecessors
|
||||
%r = "xla_hlo.reshape"(%arg1) : (tensor<1x7x7x64xf32>) -> (tensor<1x3136xf32>)
|
||||
"quant.return"(%r) : (tensor<1x3136xf32>) -> ()
|
||||
}) {input_specs = [!quant.uniform<i8:f32, 1.0>], logical_kernel = "generic.reshape", output_specs = [f32]} : (tensor<1x7x7x64xf32>) -> tensor<1x3136xf32>
|
||||
return %region : tensor<1x3136xf32>
|
||||
|
||||
// CHECK: input_specs = [!quant.uniform<i8:f32, 1.000000e+00>]
|
||||
// CHECK-SAME: output_specs = [!quant.uniform<i8:f32, 1.000000e+00>]
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
// RUN: tf-opt -xla-hlo-propagate-quant %s | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: func @mul
|
||||
func @mul(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
|
||||
// CHECK: %[[w:.*]] = constant dense<{{\[\[}}-1.000000e+00, -5.000000e-01], [5.000000e-01, 1.000000e+00]]> : tensor<2x2xf32>
|
||||
// CHECK-NEXT: %[[q:.*]] = "quant.qcast"(%[[w]]) : (tensor<2x2xf32>) -> tensor<2x2x!quant.uniform<u8:f32, 0.0078431372549019607:128>>
|
||||
// CHECK-NEXT: %[[dq:.*]] = "quant.dcast"(%[[q]]) : (tensor<2x2x!quant.uniform<u8:f32, 0.0078431372549019607:128>>) -> tensor<2x2xf32>
|
||||
// CHECK-NEXT: %[[mul:.*]] = xla_hlo.multiply %arg0, %[[dq]] : tensor<2x2xf32>
|
||||
// CHECK-NEXT: return %[[mul]] : tensor<2x2xf32>
|
||||
%w = constant dense<[[-1.0, -0.5], [0.5, 1.0]]> : tensor<2x2xf32>
|
||||
%mul = xla_hlo.multiply %arg0, %w : tensor<2x2xf32>
|
||||
return %mul: tensor<2x2xf32>
|
||||
}
|
||||
|
||||
// CHECK-LABEL: func @add
|
||||
func @add(%arg0: tensor<2x2xf32>) -> tensor<2x2xf32> {
|
||||
// CHECK: %[[b:.*]] = constant dense<1.000000e+00> : tensor<2xf32>
|
||||
// CHECK-NEXT: %[[q:.*]] = "quant.qcast"(%[[b]]) : (tensor<2xf32>) -> tensor<2x!quant.uniform<u8:f32, 0.0039215686274509803>>
|
||||
// CHECK-NEXT: %[[dq:.*]] = "quant.dcast"(%[[q]]) : (tensor<2x!quant.uniform<u8:f32, 0.0039215686274509803>>) -> tensor<2xf32>
|
||||
// CHECK-NEXT: %[[add:.*]] = "xla_hlo.add"(%arg0, %[[dq]]) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2x2xf32>
|
||||
// CHECK-NEXT: return %[[add]] : tensor<2x2xf32>
|
||||
%b = constant dense<1.0> : tensor<2xf32>
|
||||
%add = "xla_hlo.add"(%arg0, %b) {broadcast_dimensions = dense<1> : tensor<1xi64>} : (tensor<2x2xf32>, tensor<2xf32>) -> tensor<2x2xf32>
|
||||
return %add: tensor<2x2xf32>
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user