Add ParseExampleV2 op, which supports RaggedTensor features.
PiperOrigin-RevId: 266989423
This commit is contained in:
parent
ae9a7e0521
commit
407b665929
109
tensorflow/core/api_def/base_api/api_def_ParseExampleV2.pbtxt
Normal file
109
tensorflow/core/api_def/base_api/api_def_ParseExampleV2.pbtxt
Normal file
@ -0,0 +1,109 @@
|
||||
op {
|
||||
graph_op_name: "ParseExampleV2"
|
||||
in_arg {
|
||||
name: "serialized"
|
||||
description: <<END
|
||||
A scalar or vector containing binary serialized Example protos.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "names"
|
||||
description: <<END
|
||||
A tensor containing the names of the serialized protos.
|
||||
Corresponds 1:1 with the `serialized` tensor.
|
||||
May contain, for example, table key (descriptive) names for the
|
||||
corresponding serialized protos. These are purely useful for debugging
|
||||
purposes, and the presence of values here has no effect on the output.
|
||||
May also be an empty vector if no names are available.
|
||||
If non-empty, this tensor must have the same shape as "serialized".
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "sparse_keys"
|
||||
description: <<END
|
||||
Vector of strings.
|
||||
The keys expected in the Examples' features associated with sparse values.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "dense_keys"
|
||||
description: <<END
|
||||
Vector of strings.
|
||||
The keys expected in the Examples' features associated with dense values.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "ragged_keys"
|
||||
description: <<END
|
||||
Vector of strings.
|
||||
The keys expected in the Examples' features associated with ragged values.
|
||||
END
|
||||
}
|
||||
in_arg {
|
||||
name: "dense_defaults"
|
||||
description: <<END
|
||||
A list of Tensors (some may be empty). Corresponds 1:1 with `dense_keys`.
|
||||
dense_defaults[j] provides default values
|
||||
when the example's feature_map lacks dense_key[j]. If an empty Tensor is
|
||||
provided for dense_defaults[j], then the Feature dense_keys[j] is required.
|
||||
The input type is inferred from dense_defaults[j], even when it's empty.
|
||||
If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
|
||||
then the shape of dense_defaults[j] must match that of dense_shapes[j].
|
||||
If dense_shapes[j] has an undefined major dimension (variable strides dense
|
||||
feature), dense_defaults[j] must contain a single element:
|
||||
the padding element.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "num_sparse"
|
||||
description: <<END
|
||||
The number of sparse keys.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "sparse_types"
|
||||
description: <<END
|
||||
A list of `num_sparse` types; the data types of data in each Feature
|
||||
given in sparse_keys.
|
||||
Currently the ParseExample supports DT_FLOAT (FloatList),
|
||||
DT_INT64 (Int64List), and DT_STRING (BytesList).
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "ragged_value_types"
|
||||
description: <<END
|
||||
A list of `num_ragged` types; the data types of data in each Feature
|
||||
given in ragged_keys (where `num_ragged = sparse_keys.size()`).
|
||||
Currently the ParseExample supports DT_FLOAT (FloatList),
|
||||
DT_INT64 (Int64List), and DT_STRING (BytesList).
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "ragged_split_types"
|
||||
description: <<END
|
||||
A list of `num_ragged` types; the data types of row_splits in each Feature
|
||||
given in ragged_keys (where `num_ragged = sparse_keys.size()`).
|
||||
May be DT_INT32 or DT_INT64.
|
||||
END
|
||||
}
|
||||
attr {
|
||||
name: "dense_shapes"
|
||||
description: <<END
|
||||
A list of `num_dense` shapes; the shapes of data in each Feature
|
||||
given in dense_keys (where `num_dense = dense_keys.size()`).
|
||||
The number of elements in the Feature corresponding to dense_key[j]
|
||||
must always equal dense_shapes[j].NumEntries().
|
||||
If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
|
||||
Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
|
||||
The dense outputs are just the inputs row-stacked by batch.
|
||||
This works for dense_shapes[j] = (-1, D1, ..., DN). In this case
|
||||
the shape of the output Tensor dense_values[j] will be
|
||||
(|serialized|, M, D1, .., DN), where M is the maximum number of blocks
|
||||
of elements of length D1 * .... * DN, across all minibatch entries
|
||||
in the input. Any minibatch entry with less than M blocks of elements of
|
||||
length D1 * ... * DN will be padded with the corresponding default_value
|
||||
scalar element along the second dimension.
|
||||
END
|
||||
}
|
||||
summary: "Transforms a vector of tf.Example protos (as strings) into typed tensors."
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
op {
|
||||
graph_op_name: "ParseExampleV2"
|
||||
visibility: HIDDEN
|
||||
}
|
@ -24,6 +24,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/numeric_op.h"
|
||||
#include "tensorflow/core/framework/register_types.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
#include "tensorflow/core/platform/logging.h"
|
||||
#include "tensorflow/core/platform/protobuf.h"
|
||||
@ -34,93 +35,173 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
namespace {
|
||||
constexpr char kParseExampleV2[] = "ParseExampleV2";
|
||||
} // namespace
|
||||
|
||||
// Note: this kernel is used by both the ParseExample op and the ParseExampleV2
|
||||
// op. It automatically determines which op was used by checking if the
|
||||
// "ragged_value_types" attribute exists.
|
||||
class ParseExampleOp : public OpKernel {
|
||||
public:
|
||||
explicit ParseExampleOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
|
||||
OP_REQUIRES_OK(ctx, attrs_.Init(ctx));
|
||||
explicit ParseExampleOp(OpKernelConstruction* ctx)
|
||||
: OpKernel(ctx), op_version_(ctx->def().op() == kParseExampleV2 ? 2 : 1) {
|
||||
OP_REQUIRES_OK(ctx, attrs_.Init(ctx, op_version_));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
const Tensor* names;
|
||||
const Tensor* serialized;
|
||||
OpInputList dense_keys;
|
||||
OpInputList sparse_keys;
|
||||
std::vector<string> dense_keys_t;
|
||||
std::vector<string> sparse_keys_t;
|
||||
std::vector<string> ragged_keys_t;
|
||||
OpInputList dense_defaults;
|
||||
|
||||
// Grab the input list arguments.
|
||||
OP_REQUIRES_OK(ctx, ctx->input("names", &names));
|
||||
// Grab the inputs.
|
||||
OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized));
|
||||
OP_REQUIRES_OK(ctx, ctx->input_list("dense_keys", &dense_keys));
|
||||
OP_REQUIRES_OK(ctx, ctx->input_list("sparse_keys", &sparse_keys));
|
||||
OP_REQUIRES_OK(ctx, ctx->input("names", &names));
|
||||
if (op_version_ == 2) {
|
||||
OP_REQUIRES_OK(ctx, GetTensorKeys(ctx, "dense_keys", &dense_keys_t));
|
||||
OP_REQUIRES_OK(ctx, GetTensorKeys(ctx, "sparse_keys", &sparse_keys_t));
|
||||
OP_REQUIRES_OK(ctx, GetTensorKeys(ctx, "ragged_keys", &ragged_keys_t));
|
||||
} else {
|
||||
OP_REQUIRES_OK(ctx, GetInputListKeys(ctx, "dense_keys", &dense_keys_t));
|
||||
OP_REQUIRES_OK(ctx, GetInputListKeys(ctx, "sparse_keys", &sparse_keys_t));
|
||||
}
|
||||
OP_REQUIRES_OK(ctx, ctx->input_list("dense_defaults", &dense_defaults));
|
||||
|
||||
std::vector<string> dense_keys_t(attrs_.num_dense);
|
||||
std::vector<string> sparse_keys_t(attrs_.num_sparse);
|
||||
// Validate input tensor shapes.
|
||||
OP_REQUIRES_OK(
|
||||
ctx, CheckInputShapes(serialized, names, dense_defaults, dense_keys_t,
|
||||
sparse_keys_t, ragged_keys_t));
|
||||
|
||||
// Check that the input list sizes match the attribute declared sizes.
|
||||
CHECK_EQ(dense_keys.size(), attrs_.num_dense);
|
||||
CHECK_EQ(sparse_keys.size(), attrs_.num_sparse);
|
||||
example::FastParseExampleConfig config =
|
||||
MakeConfig(dense_keys_t, sparse_keys_t, ragged_keys_t, dense_defaults);
|
||||
|
||||
// Copy from OpInputList to std::vector<string>.
|
||||
for (int di = 0; di < attrs_.num_dense; ++di) {
|
||||
dense_keys_t[di] = dense_keys[di].scalar<tstring>()();
|
||||
example::Result result;
|
||||
if (TensorShapeUtils::IsVector(serialized->shape())) {
|
||||
OP_REQUIRES_OK(
|
||||
ctx, ParseExampleVector(config, serialized, names, ctx, &result));
|
||||
} else {
|
||||
OP_REQUIRES_OK(ctx, ParseExampleScalar(config, serialized, ctx, &result));
|
||||
}
|
||||
for (int di = 0; di < attrs_.num_sparse; ++di) {
|
||||
sparse_keys_t[di] = sparse_keys[di].scalar<tstring>()();
|
||||
OP_REQUIRES_OK(ctx, WriteOutput(result, ctx));
|
||||
}
|
||||
|
||||
protected:
|
||||
// Copies keys from tensor to std::vector<string>.
|
||||
Status GetTensorKeys(OpKernelContext* ctx, const string& input_name,
|
||||
std::vector<string>* keys) const {
|
||||
const Tensor* key_t;
|
||||
TF_RETURN_IF_ERROR(ctx->input(input_name, &key_t));
|
||||
keys->reserve(key_t->NumElements());
|
||||
auto keys_flat = key_t->flat<tstring>();
|
||||
for (int i = 0; i < keys_flat.size(); ++i) {
|
||||
keys->push_back(keys_flat(i));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Copies keys from OpInputList of scalar to std::vector<string>.
|
||||
Status GetInputListKeys(OpKernelContext* ctx, const string& input_name,
|
||||
std::vector<string>* keys) const {
|
||||
OpInputList key_list;
|
||||
TF_RETURN_IF_ERROR(ctx->input_list(input_name, &key_list));
|
||||
keys->reserve(key_list.size());
|
||||
for (const auto& key : key_list) {
|
||||
keys->push_back(key.scalar<tstring>()());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Validates the shapes of input tensors.
|
||||
Status CheckInputShapes(const Tensor* serialized, const Tensor* names,
|
||||
const OpInputList& dense_defaults,
|
||||
const std::vector<string>& dense_keys_t,
|
||||
const std::vector<string>& sparse_keys_t,
|
||||
const std::vector<string>& ragged_keys_t) const {
|
||||
if (op_version_ == 2) {
|
||||
if (TensorShapeUtils::IsMatrixOrHigher(serialized->shape())) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected serialized to be a scalar or vector, got shape: ",
|
||||
serialized->shape().DebugString());
|
||||
}
|
||||
} else {
|
||||
if (!TensorShapeUtils::IsVector(serialized->shape())) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected serialized to be a vector, got shape: ",
|
||||
serialized->shape().DebugString());
|
||||
}
|
||||
}
|
||||
if (names->NumElements() > 0 && names->shape() != serialized->shape()) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected names have the same shape as serialized: name.shape=",
|
||||
names->shape().DebugString(),
|
||||
", serialized.shape=", serialized->shape().DebugString());
|
||||
}
|
||||
if (op_version_ == 2) {
|
||||
if (dense_keys_t.size() != attrs_.num_dense) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected len(dense_keys) == len(dense_types) but got: ",
|
||||
dense_keys_t.size(), " vs. ", attrs_.num_dense);
|
||||
}
|
||||
if (sparse_keys_t.size() != attrs_.num_sparse) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected len(sparse_keys) == num_sparse but got: ",
|
||||
sparse_keys_t.size(), " vs. ", attrs_.num_sparse);
|
||||
}
|
||||
if (ragged_keys_t.size() != attrs_.num_ragged) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected len(ragged_keys) == len(ragged_value_types) but got: ",
|
||||
ragged_keys_t.size(), " vs. ", attrs_.num_ragged);
|
||||
}
|
||||
}
|
||||
|
||||
if (names->NumElements() > 0) {
|
||||
OP_REQUIRES(
|
||||
ctx, TensorShapeUtils::IsVector(names->shape()),
|
||||
errors::InvalidArgument("Expected names to be a vector, got shape: ",
|
||||
names->shape().DebugString()));
|
||||
OP_REQUIRES(
|
||||
ctx, names->NumElements() == serialized->NumElements(),
|
||||
errors::InvalidArgument(
|
||||
"Expected len(names) == len(serialized), but got: ",
|
||||
names->NumElements(), " vs. ", serialized->NumElements()));
|
||||
if (dense_defaults.size() != attrs_.num_dense) {
|
||||
return errors::InvalidArgument(
|
||||
"Expected len(dense_defaults) == len(dense_keys) but got: ",
|
||||
dense_defaults.size(), " vs. ", attrs_.num_dense);
|
||||
}
|
||||
|
||||
OP_REQUIRES(ctx, TensorShapeUtils::IsVector(serialized->shape()),
|
||||
errors::InvalidArgument(
|
||||
"Expected serialized to be a vector, got shape: ",
|
||||
serialized->shape().DebugString()));
|
||||
OP_REQUIRES(ctx, dense_defaults.size() == attrs_.num_dense,
|
||||
errors::InvalidArgument(
|
||||
"Expected len(dense_defaults) == len(dense_keys) but got: ",
|
||||
dense_defaults.size(), " vs. ", attrs_.num_dense));
|
||||
|
||||
for (int d = 0; d < static_cast<int>(attrs_.num_dense); ++d) {
|
||||
const Tensor& def_value = dense_defaults[d];
|
||||
if (attrs_.variable_length[d]) {
|
||||
OP_REQUIRES(ctx, def_value.NumElements() == 1,
|
||||
errors::InvalidArgument(
|
||||
"dense_shape[", d, "] is a variable length shape: ",
|
||||
attrs_.dense_shapes[d].DebugString(),
|
||||
", therefore "
|
||||
"def_value[",
|
||||
d,
|
||||
"] must contain a single element ("
|
||||
"the padding element). But its shape is: ",
|
||||
def_value.shape().DebugString()));
|
||||
if (def_value.NumElements() != 1) {
|
||||
return errors::InvalidArgument(
|
||||
"dense_shape[", d, "] is a variable length shape: ",
|
||||
attrs_.dense_shapes[d].DebugString(),
|
||||
", therefore "
|
||||
"def_value[",
|
||||
d,
|
||||
"] must contain a single element ("
|
||||
"the padding element). But its shape is: ",
|
||||
def_value.shape().DebugString());
|
||||
}
|
||||
} else if (def_value.NumElements() > 0) {
|
||||
OP_REQUIRES(ctx,
|
||||
attrs_.dense_shapes[d].IsCompatibleWith(def_value.shape()),
|
||||
errors::InvalidArgument(
|
||||
"def_value[", d,
|
||||
"].shape() == ", def_value.shape().DebugString(),
|
||||
" is not compatible with dense_shapes_[", d,
|
||||
"] == ", attrs_.dense_shapes[d].DebugString()));
|
||||
if (!attrs_.dense_shapes[d].IsCompatibleWith(def_value.shape())) {
|
||||
return errors::InvalidArgument(
|
||||
"def_value[", d, "].shape() == ", def_value.shape().DebugString(),
|
||||
" is not compatible with dense_shapes_[", d,
|
||||
"] == ", attrs_.dense_shapes[d].DebugString());
|
||||
}
|
||||
}
|
||||
if (def_value.dtype() != attrs_.dense_types[d]) {
|
||||
return errors::InvalidArgument(
|
||||
"dense_defaults[", d,
|
||||
"].dtype() == ", DataTypeString(def_value.dtype()),
|
||||
" != dense_types_[", d,
|
||||
"] == ", DataTypeString(attrs_.dense_types[d]));
|
||||
}
|
||||
OP_REQUIRES(ctx, def_value.dtype() == attrs_.dense_types[d],
|
||||
errors::InvalidArgument(
|
||||
"dense_defaults[", d, "].dtype() == ",
|
||||
DataTypeString(def_value.dtype()), " != dense_types_[", d,
|
||||
"] == ", DataTypeString(attrs_.dense_types[d])));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
example::Result result;
|
||||
|
||||
// Populates the FastParseExampleConfig from keys & defaults.
|
||||
example::FastParseExampleConfig MakeConfig(
|
||||
const std::vector<string>& dense_keys_t,
|
||||
const std::vector<string>& sparse_keys_t,
|
||||
const std::vector<string>& ragged_keys_t,
|
||||
const OpInputList& dense_defaults) const {
|
||||
example::FastParseExampleConfig config;
|
||||
for (int d = 0; d < attrs_.num_dense; ++d) {
|
||||
config.dense.push_back({dense_keys_t[d], attrs_.dense_types[d],
|
||||
@ -131,26 +212,44 @@ class ParseExampleOp : public OpKernel {
|
||||
for (int d = 0; d < attrs_.num_sparse; ++d) {
|
||||
config.sparse.push_back({sparse_keys_t[d], attrs_.sparse_types[d]});
|
||||
}
|
||||
for (int d = 0; d < attrs_.num_ragged; ++d) {
|
||||
config.ragged.push_back({ragged_keys_t[d], attrs_.ragged_value_types[d],
|
||||
attrs_.ragged_split_types[d]});
|
||||
}
|
||||
return config;
|
||||
}
|
||||
|
||||
// Parses a single example.
|
||||
Status ParseExampleScalar(const example::FastParseExampleConfig& config,
|
||||
const Tensor* serialized, OpKernelContext* ctx,
|
||||
example::Result* result) const {
|
||||
const string& serialized_proto = serialized->scalar<tstring>()();
|
||||
return FastParseSingleExample(config, serialized_proto, result);
|
||||
}
|
||||
|
||||
// Parses a vector of examples.
|
||||
Status ParseExampleVector(const example::FastParseExampleConfig& config,
|
||||
const Tensor* serialized, const Tensor* names,
|
||||
OpKernelContext* ctx,
|
||||
example::Result* result) const {
|
||||
auto serialized_t = serialized->flat<tstring>();
|
||||
auto names_t = names->flat<tstring>();
|
||||
gtl::ArraySlice<tstring> slice(serialized_t.data(), serialized_t.size());
|
||||
gtl::ArraySlice<tstring> names_slice(names_t.data(), names_t.size());
|
||||
return FastParseExample(
|
||||
config, slice, names_slice,
|
||||
ctx->device()->tensorflow_cpu_worker_threads()->workers, result);
|
||||
}
|
||||
|
||||
OP_REQUIRES_OK(
|
||||
ctx,
|
||||
FastParseExample(
|
||||
config, slice, names_slice,
|
||||
ctx->device()->tensorflow_cpu_worker_threads()->workers, &result));
|
||||
|
||||
Status WriteOutput(example::Result result, OpKernelContext* ctx) const {
|
||||
OpOutputList dense_values;
|
||||
OpOutputList sparse_indices;
|
||||
OpOutputList sparse_values;
|
||||
OpOutputList sparse_shapes;
|
||||
OP_REQUIRES_OK(ctx, ctx->output_list("dense_values", &dense_values));
|
||||
OP_REQUIRES_OK(ctx, ctx->output_list("sparse_indices", &sparse_indices));
|
||||
OP_REQUIRES_OK(ctx, ctx->output_list("sparse_values", &sparse_values));
|
||||
OP_REQUIRES_OK(ctx, ctx->output_list("sparse_shapes", &sparse_shapes));
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("dense_values", &dense_values));
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("sparse_indices", &sparse_indices));
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("sparse_values", &sparse_values));
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("sparse_shapes", &sparse_shapes));
|
||||
for (int d = 0; d < attrs_.num_dense; ++d) {
|
||||
dense_values.set(d, result.dense_values[d]);
|
||||
}
|
||||
@ -159,14 +258,27 @@ class ParseExampleOp : public OpKernel {
|
||||
sparse_values.set(d, result.sparse_values[d]);
|
||||
sparse_shapes.set(d, result.sparse_shapes[d]);
|
||||
}
|
||||
if (op_version_ == 2) {
|
||||
OpOutputList ragged_values;
|
||||
OpOutputList ragged_splits;
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("ragged_values", &ragged_values));
|
||||
TF_RETURN_IF_ERROR(ctx->output_list("ragged_row_splits", &ragged_splits));
|
||||
for (int d = 0; d < attrs_.num_ragged; ++d) {
|
||||
ragged_values.set(d, result.ragged_values[d]);
|
||||
ragged_splits.set(d, result.ragged_splits[d]);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
protected:
|
||||
ParseExampleAttrs attrs_;
|
||||
int op_version_;
|
||||
};
|
||||
|
||||
REGISTER_KERNEL_BUILDER(Name("ParseExample").Device(DEVICE_CPU),
|
||||
ParseExampleOp);
|
||||
REGISTER_KERNEL_BUILDER(Name("ParseExampleV2").Device(DEVICE_CPU),
|
||||
ParseExampleOp);
|
||||
|
||||
class ParseSingleExampleOp : public OpKernel {
|
||||
public:
|
||||
@ -844,7 +956,7 @@ class DecodeJSONExampleOp : public OpKernel {
|
||||
"type.googleapis.com", protobuf::DescriptorPool::generated_pool()));
|
||||
}
|
||||
|
||||
void Compute(OpKernelContext* ctx) {
|
||||
void Compute(OpKernelContext* ctx) override {
|
||||
const Tensor* json_examples;
|
||||
OP_REQUIRES_OK(ctx, ctx->input("json_examples", &json_examples));
|
||||
Tensor* binary_examples;
|
||||
|
@ -139,7 +139,7 @@ template struct ExampleStore<BytesFiller>;
|
||||
template struct ExampleStore<Int64Filler>;
|
||||
template struct ExampleStore<FloatFiller>;
|
||||
|
||||
enum BenchmarkType { kDense, kSparse, kVarLenDense };
|
||||
enum BenchmarkType { kDense, kSparse, kVarLenDense, kRagged };
|
||||
|
||||
template <typename S, BenchmarkType b_type>
|
||||
struct BenchmarkOptions {
|
||||
@ -198,6 +198,70 @@ static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
|
||||
return g;
|
||||
}
|
||||
|
||||
template <typename Options>
|
||||
static Graph* ParseExampleV2(int batch_size, int num_keys, int feature_size) {
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
Tensor& serialized = Options::Store::GetSerializedExample()[std::make_tuple(
|
||||
batch_size, num_keys, feature_size)];
|
||||
Tensor names(DT_STRING, TensorShape({batch_size}));
|
||||
|
||||
std::vector<NodeBuilder::NodeOut> dense_defaults;
|
||||
std::vector<DataType> sparse_types;
|
||||
std::vector<DataType> ragged_value_types;
|
||||
std::vector<DataType> ragged_split_types;
|
||||
std::vector<PartialTensorShape> dense_shapes;
|
||||
Tensor keys_t(DT_STRING, {static_cast<int32>(num_keys)});
|
||||
auto keys_flat = keys_t.flat<string>();
|
||||
Options opt;
|
||||
for (int i = 0; i < num_keys; ++i) {
|
||||
keys_flat(i) = strings::Printf("feature_%d", i);
|
||||
switch (opt.benchmark_type) {
|
||||
case kDense:
|
||||
dense_defaults.emplace_back(test::graph::Constant(
|
||||
g, opt.filler.make_dense_default(feature_size)));
|
||||
dense_shapes.push_back(PartialTensorShape({feature_size}));
|
||||
break;
|
||||
case kVarLenDense:
|
||||
dense_defaults.emplace_back(
|
||||
test::graph::Constant(g, opt.filler.make_dense_default(1)));
|
||||
dense_shapes.push_back(PartialTensorShape({-1}));
|
||||
break;
|
||||
case kSparse:
|
||||
sparse_types.push_back(opt.filler.dtype);
|
||||
break;
|
||||
case kRagged:
|
||||
ragged_value_types.push_back(opt.filler.dtype);
|
||||
ragged_split_types.push_back(DT_INT32);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Tensor empty_keys(DT_STRING, {0});
|
||||
auto bm_type = opt.benchmark_type;
|
||||
auto& sparse_keys = (bm_type == kSparse) ? keys_t : empty_keys;
|
||||
auto& dense_keys =
|
||||
(bm_type == kDense || bm_type == kVarLenDense) ? keys_t : empty_keys;
|
||||
auto& ragged_keys = (bm_type == kRagged) ? keys_t : empty_keys;
|
||||
int num_sparse = opt.benchmark_type == kSparse ? num_keys : 0;
|
||||
|
||||
Node* ret;
|
||||
TF_EXPECT_OK(NodeBuilder(g->NewName("n"), "ParseExampleV2")
|
||||
.Input(test::graph::Constant(g, serialized))
|
||||
.Input(test::graph::Constant(g, names))
|
||||
.Input(test::graph::Constant(g, sparse_keys))
|
||||
.Input(test::graph::Constant(g, dense_keys))
|
||||
.Input(test::graph::Constant(g, ragged_keys))
|
||||
.Input(dense_defaults)
|
||||
.Attr("num_sparse", num_sparse)
|
||||
.Attr("sparse_types", sparse_types)
|
||||
.Attr("ragged_value_types", ragged_value_types)
|
||||
.Attr("ragged_split_types", ragged_split_types)
|
||||
.Attr("dense_shapes", dense_shapes)
|
||||
.Finalize(g, &ret));
|
||||
|
||||
return g;
|
||||
}
|
||||
|
||||
template <typename Options>
|
||||
static Graph* ParseSingleExample(int num_keys, int feature_size) {
|
||||
Graph* g = new Graph(OpRegistry::Global());
|
||||
@ -253,14 +317,17 @@ typedef BenchmarkOptions<ExampleStore<BytesFiller>, kSparse> SparseString;
|
||||
typedef BenchmarkOptions<ExampleStore<BytesFiller>, kDense> DenseString;
|
||||
typedef BenchmarkOptions<ExampleStore<BytesFiller>, kVarLenDense>
|
||||
VarLenDenseString;
|
||||
typedef BenchmarkOptions<ExampleStore<BytesFiller>, kRagged> RaggedString;
|
||||
typedef BenchmarkOptions<ExampleStore<Int64Filler>, kSparse> SparseInt64;
|
||||
typedef BenchmarkOptions<ExampleStore<Int64Filler>, kDense> DenseInt64;
|
||||
typedef BenchmarkOptions<ExampleStore<Int64Filler>, kVarLenDense>
|
||||
VarLenDenseInt64;
|
||||
typedef BenchmarkOptions<ExampleStore<Int64Filler>, kRagged> RaggedInt64;
|
||||
typedef BenchmarkOptions<ExampleStore<FloatFiller>, kSparse> SparseFloat;
|
||||
typedef BenchmarkOptions<ExampleStore<FloatFiller>, kDense> DenseFloat;
|
||||
typedef BenchmarkOptions<ExampleStore<FloatFiller>, kVarLenDense>
|
||||
VarLenDenseFloat;
|
||||
typedef BenchmarkOptions<ExampleStore<FloatFiller>, kRagged> RaggedFloat;
|
||||
|
||||
// B == batch_size, K == num_keys. F == feature_size.
|
||||
// K must be one of 10, 100, 1000
|
||||
@ -295,6 +362,42 @@ BM_AllParseExample(SparseFloat);
|
||||
BM_AllParseExample(DenseFloat);
|
||||
BM_AllParseExample(VarLenDenseFloat);
|
||||
|
||||
// B == batch_size, K == num_keys. F == feature_size.
|
||||
// K must be one of 10, 100, 1000
|
||||
#define BM_ParseExampleV2(TYPE, B, K, F) \
|
||||
static void BM_ParseExampleV2##_##TYPE##_##B##_##K##_##F(int iters) { \
|
||||
int64 items_per_iter = static_cast<int64>(B) * K * F; \
|
||||
testing::UseRealTime(); \
|
||||
testing::ItemsProcessed(static_cast<int64>(iters) * items_per_iter); \
|
||||
test::Benchmark("cpu", ParseExampleV2<TYPE>(B, K, F)).Run(iters); \
|
||||
} \
|
||||
BENCHMARK(BM_ParseExampleV2##_##TYPE##_##B##_##K##_##F);
|
||||
|
||||
#define BM_AllParseExampleV2(Type) \
|
||||
BM_ParseExampleV2(Type, 1, 10, 1); \
|
||||
BM_ParseExampleV2(Type, 128, 10, 1); \
|
||||
BM_ParseExampleV2(Type, 512, 10, 1); \
|
||||
BM_ParseExampleV2(Type, 1, 100, 1); \
|
||||
BM_ParseExampleV2(Type, 128, 100, 1); \
|
||||
BM_ParseExampleV2(Type, 512, 100, 1); \
|
||||
BM_ParseExampleV2(Type, 1, 1000, 1); \
|
||||
BM_ParseExampleV2(Type, 128, 1000, 1); \
|
||||
BM_ParseExampleV2(Type, 512, 1000, 1); \
|
||||
BM_ParseExampleV2(Type, 1, 1, 1000000);
|
||||
|
||||
BM_AllParseExampleV2(SparseString);
|
||||
BM_AllParseExampleV2(DenseString);
|
||||
BM_AllParseExampleV2(VarLenDenseString);
|
||||
BM_AllParseExampleV2(RaggedString);
|
||||
BM_AllParseExampleV2(SparseInt64);
|
||||
BM_AllParseExampleV2(DenseInt64);
|
||||
BM_AllParseExampleV2(VarLenDenseInt64);
|
||||
BM_AllParseExampleV2(RaggedInt64);
|
||||
BM_AllParseExampleV2(SparseFloat);
|
||||
BM_AllParseExampleV2(DenseFloat);
|
||||
BM_AllParseExampleV2(VarLenDenseFloat);
|
||||
BM_AllParseExampleV2(RaggedFloat);
|
||||
|
||||
// K == num_keys. F == feature_size.
|
||||
// K must be one of 10, 100, 1000
|
||||
#define BM_ParseSingleExample(TYPE, K, F) \
|
||||
|
@ -16,6 +16,7 @@ limitations under the License.
|
||||
#include "tensorflow/core/framework/common_shape_fns.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/util/example_proto_helper.h"
|
||||
|
||||
namespace tensorflow {
|
||||
@ -24,6 +25,75 @@ using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
namespace {
|
||||
|
||||
// Adds output shapes for dense tensors in Parse*Example ops.
|
||||
template <typename TensorShapeType> // TensorShape or PartialTensorShape
|
||||
Status AddDenseOutputShapes(const std::vector<TensorShapeType>& dense_shapes,
|
||||
const ShapeHandle& prefix, InferenceContext* c,
|
||||
int* output_idx) {
|
||||
for (const auto& dense_shape : dense_shapes) {
|
||||
ShapeHandle s;
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(dense_shape, &s));
|
||||
TF_RETURN_IF_ERROR(c->Concatenate(prefix, s, &s));
|
||||
c->set_output((*output_idx)++, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Adds output shapes for sparse tensors in Parse*Example ops.
|
||||
void AddSparseOutputShapes(int num_sparse, const ShapeHandle input_shape,
|
||||
int64 rank_delta, InferenceContext* c,
|
||||
int* output_idx) {
|
||||
// Rank of SparseTensor is rank of input tensor plus rank_delta.
|
||||
shape_inference::DimensionOrConstant rank(c->UnknownDim());
|
||||
if (c->RankKnown(input_shape)) {
|
||||
rank = c->Rank(input_shape) + rank_delta;
|
||||
}
|
||||
for (int i = 0; i < num_sparse; ++i) { // sparse_indices
|
||||
c->set_output((*output_idx)++, c->Matrix(c->UnknownDim(), rank));
|
||||
}
|
||||
for (int i = 0; i < num_sparse; ++i) { // sparse_values
|
||||
c->set_output((*output_idx)++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < num_sparse; ++i) { // sparse_dense_shapes
|
||||
c->set_output((*output_idx)++, c->Vector(rank));
|
||||
}
|
||||
}
|
||||
|
||||
// Adds output shapes for ragged tensors in Parse*Examle ops.
|
||||
Status AddRaggedOutputShapes(int num_ragged, bool ragged_rank_2,
|
||||
const DimensionHandle& num_examples,
|
||||
InferenceContext* c, int* output_idx) {
|
||||
DimensionHandle num_splits;
|
||||
TF_RETURN_IF_ERROR(c->Add(num_examples, 1, &num_splits));
|
||||
// Values
|
||||
for (int i = 0; i < num_ragged; ++i) {
|
||||
c->set_output((*output_idx)++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
// Inner row_splits
|
||||
if (ragged_rank_2) {
|
||||
for (int i = 0; i < num_ragged; ++i) {
|
||||
c->set_output((*output_idx)++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
}
|
||||
// Outer row_splits.
|
||||
for (int i = 0; i < num_ragged; ++i) {
|
||||
c->set_output((*output_idx)++, c->Vector(num_splits));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Adds output shapes for dense_lengths tensors in Parse*Example ops.
|
||||
void AddDenseLengthsShapes(int num_dense, const ShapeHandle& shape,
|
||||
InferenceContext* c, int* output_idx) {
|
||||
for (int i = 0; i < num_dense; ++i) {
|
||||
c->set_output((*output_idx)++, shape);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_OP("DecodeRaw")
|
||||
.Input("bytes: string")
|
||||
.Output("output: out_type")
|
||||
@ -89,33 +159,68 @@ REGISTER_OP("ParseExample")
|
||||
.Attr("dense_shapes: list(shape) >= 0")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ParseExampleAttrs attrs;
|
||||
TF_RETURN_IF_ERROR(attrs.Init(c));
|
||||
TF_RETURN_IF_ERROR(attrs.Init(c, /*op_version=*/1));
|
||||
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &input));
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); // names
|
||||
ShapeHandle names;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &names));
|
||||
|
||||
// Output sparse_indices, sparse_values, and sparse_shapes.
|
||||
int output_idx = 0;
|
||||
for (int i = 0; i < attrs.num_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 2));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(2));
|
||||
AddSparseOutputShapes(attrs.num_sparse, input, 1, c, &output_idx);
|
||||
TF_RETURN_IF_ERROR(
|
||||
AddDenseOutputShapes(attrs.dense_shapes, input, c, &output_idx));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
// Differences between ParseExample and ParseExampleV2:
|
||||
// * Supports ragged features.
|
||||
// * `serialized` may be a vector or a scalar. (With v1, `serialized` could
|
||||
// only be a vector).
|
||||
// * Each set of keys is passed with a vector instead of a list of scalars.
|
||||
// * No Ndense attribute (not needed).
|
||||
// * num_sparse (formerly Nsparse) is no longer inferred; you must specify it
|
||||
// explicitly.
|
||||
REGISTER_OP("ParseExampleV2")
|
||||
.Input("serialized: string")
|
||||
.Input("names: string")
|
||||
.Input("sparse_keys: string")
|
||||
.Input("dense_keys: string")
|
||||
.Input("ragged_keys: string")
|
||||
.Input("dense_defaults: Tdense")
|
||||
.Output("sparse_indices: num_sparse * int64")
|
||||
.Output("sparse_values: sparse_types")
|
||||
.Output("sparse_shapes: num_sparse * int64")
|
||||
.Output("dense_values: Tdense")
|
||||
.Output("ragged_values: ragged_value_types")
|
||||
.Output("ragged_row_splits: ragged_split_types")
|
||||
.Attr("Tdense: list({float,int64,string}) >= 0") // Inferred
|
||||
.Attr("num_sparse: int >= 0")
|
||||
.Attr("sparse_types: list({float,int64,string}) >= 0")
|
||||
.Attr("ragged_value_types: list({float,int64,string}) >= 0")
|
||||
.Attr("ragged_split_types: list({int32,int64}) >= 0")
|
||||
.Attr("dense_shapes: list(shape) >= 0")
|
||||
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
ParseExampleAttrs attrs;
|
||||
TF_RETURN_IF_ERROR(attrs.Init(c, /*op_version=*/2));
|
||||
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(0), 1, &input));
|
||||
ShapeHandle names;
|
||||
TF_RETURN_IF_ERROR(c->WithRankAtMost(c->input(1), 1, &names));
|
||||
DimensionHandle num_examples = c->UnknownDim();
|
||||
if (c->RankKnown(input) && c->Rank(input) == 1) {
|
||||
num_examples = c->Dim(input, 0);
|
||||
}
|
||||
|
||||
// Output dense_shapes.
|
||||
for (int i = 0; i < attrs.num_dense; ++i) {
|
||||
ShapeHandle dense;
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->MakeShapeFromPartialTensorShape(attrs.dense_shapes[i], &dense));
|
||||
TF_RETURN_IF_ERROR(c->Concatenate(input, dense, &dense));
|
||||
c->set_output(output_idx++, dense);
|
||||
}
|
||||
int output_idx = 0;
|
||||
AddSparseOutputShapes(attrs.num_sparse, input, 1, c, &output_idx);
|
||||
TF_RETURN_IF_ERROR(
|
||||
AddDenseOutputShapes(attrs.dense_shapes, input, c, &output_idx));
|
||||
TF_RETURN_IF_ERROR(AddRaggedOutputShapes(attrs.num_ragged, false,
|
||||
num_examples, c, &output_idx));
|
||||
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
@ -139,25 +244,10 @@ REGISTER_OP("ParseSingleExample")
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &input));
|
||||
|
||||
// Output sparse_indices, sparse_values, and sparse_shapes.
|
||||
int output_idx = 0;
|
||||
for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 1));
|
||||
}
|
||||
for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.sparse_keys.size(); ++i) {
|
||||
c->set_output(output_idx++, c->Vector(1));
|
||||
}
|
||||
|
||||
// Output dense_shapes.
|
||||
for (int i = 0; i < attrs.dense_keys.size(); ++i) {
|
||||
ShapeHandle dense;
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->MakeShapeFromPartialTensorShape(attrs.dense_shapes[i], &dense));
|
||||
c->set_output(output_idx++, dense);
|
||||
}
|
||||
AddSparseOutputShapes(attrs.sparse_keys.size(), input, 1, c, &output_idx);
|
||||
TF_RETURN_IF_ERROR(
|
||||
AddDenseOutputShapes(attrs.dense_shapes, input, c, &output_idx));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
@ -196,60 +286,23 @@ REGISTER_OP("ParseSequenceExample")
|
||||
// Verify that the input is a vector, and carry the shape if known.
|
||||
ShapeHandle input;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &input));
|
||||
shape_inference::DimensionHandle num_examples = c->Dim(input, 0);
|
||||
|
||||
ShapeHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); // debug_name
|
||||
ShapeHandle names;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &names));
|
||||
DimensionHandle num_examples = c->Dim(input, 0);
|
||||
ShapeHandle feature_list_dense_prefix =
|
||||
c->Matrix(num_examples, c->UnknownDim());
|
||||
|
||||
int output_idx = 0;
|
||||
|
||||
// Output context_sparse_indices, context_sparse_values, and
|
||||
// context_sparse_shapes.
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 2));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(2));
|
||||
}
|
||||
|
||||
// Output context_dense_values.
|
||||
for (int i = 0; i < attrs.num_context_dense; ++i) {
|
||||
ShapeHandle s;
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
|
||||
attrs.context_dense_shapes[i], &s));
|
||||
TF_RETURN_IF_ERROR(c->Concatenate(c->Vector(num_examples), s, &s));
|
||||
c->set_output(output_idx++, s);
|
||||
}
|
||||
|
||||
// Output feature_list_sparse_indices, feature_list_sparse_values,
|
||||
// feature_list_sparse_shapes.
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 3));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(3));
|
||||
}
|
||||
|
||||
// Output feature_list_dense_shapes.
|
||||
for (int i = 0; i < attrs.num_feature_list_dense; ++i) {
|
||||
ShapeHandle s;
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
|
||||
attrs.feature_list_dense_shapes[i], &s));
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->Concatenate(c->Matrix(num_examples, c->UnknownDim()), s, &s));
|
||||
c->set_output(output_idx++, s);
|
||||
}
|
||||
|
||||
// Output feature_list_dense_lengths.
|
||||
for (int i = 0; i < attrs.num_feature_list_dense; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(num_examples));
|
||||
}
|
||||
AddSparseOutputShapes(attrs.num_context_sparse, input, 1, c, &output_idx);
|
||||
TF_RETURN_IF_ERROR(AddDenseOutputShapes(attrs.context_dense_shapes, input,
|
||||
c, &output_idx));
|
||||
AddSparseOutputShapes(attrs.num_feature_list_sparse, input, 2, c,
|
||||
&output_idx);
|
||||
TF_RETURN_IF_ERROR(AddDenseOutputShapes(attrs.feature_list_dense_shapes,
|
||||
feature_list_dense_prefix, c,
|
||||
&output_idx));
|
||||
AddDenseLengthsShapes(attrs.num_feature_list_dense, input, c,
|
||||
&output_idx);
|
||||
|
||||
return Status::OK();
|
||||
});
|
||||
@ -297,48 +350,14 @@ REGISTER_OP("ParseSingleSequenceExample")
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
|
||||
|
||||
int output_idx = 0;
|
||||
|
||||
// Output context_sparse_indices, context_sparse_values, and
|
||||
// context_sparse_shapes.
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 1));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_context_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(1));
|
||||
}
|
||||
|
||||
// Output context_dense_shapes.
|
||||
for (int i = 0; i < attrs.num_context_dense; ++i) {
|
||||
ShapeHandle s;
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
|
||||
attrs.context_dense_shapes[i], &s));
|
||||
c->set_output(output_idx++, s);
|
||||
}
|
||||
|
||||
// Output feature_list_sparse_indices, feature_list_sparse_values,
|
||||
// feature_list_sparse_shapes.
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Matrix(c->UnknownDim(), 2));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(c->UnknownDim()));
|
||||
}
|
||||
for (int i = 0; i < attrs.num_feature_list_sparse; ++i) {
|
||||
c->set_output(output_idx++, c->Vector(2));
|
||||
}
|
||||
|
||||
// Output feature_list_dense_shapes.
|
||||
for (int i = 0; i < attrs.num_feature_list_dense; ++i) {
|
||||
ShapeHandle s;
|
||||
TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape(
|
||||
attrs.feature_list_dense_shapes[i], &s));
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->Concatenate(c->Vector(InferenceContext::kUnknownDim), s, &s));
|
||||
c->set_output(output_idx++, s);
|
||||
}
|
||||
AddSparseOutputShapes(attrs.num_context_sparse, input, 1, c, &output_idx);
|
||||
TF_RETURN_IF_ERROR(AddDenseOutputShapes(attrs.context_dense_shapes, input,
|
||||
c, &output_idx));
|
||||
AddSparseOutputShapes(attrs.num_feature_list_sparse, input, 2, c,
|
||||
&output_idx);
|
||||
TF_RETURN_IF_ERROR(AddDenseOutputShapes(attrs.feature_list_dense_shapes,
|
||||
c->UnknownShapeOfRank(1), c,
|
||||
&output_idx));
|
||||
return Status::OK();
|
||||
});
|
||||
|
||||
|
@ -100,7 +100,7 @@ TEST(ParsingOpsTest, ParseExample_ShapeFn) {
|
||||
.Input(NodeOutList(num_dense, string_in))
|
||||
.Input(NodeOutList(num_dense, string_in))
|
||||
.Attr("sparse_types", DataTypeList(num_sparse, DT_FLOAT))
|
||||
.Attr("dense_types", DataTypeList(num_dense, DT_FLOAT))
|
||||
// Tdense is inferred from dense_defaults.
|
||||
.Attr("dense_shapes", MakeDenseShapes(num_dense, add_extra_shape,
|
||||
unknown_outer_dims))
|
||||
.Finalize(&op.node_def));
|
||||
@ -132,6 +132,9 @@ TEST(ParsingOpsTest, ParseExample_ShapeFn) {
|
||||
INFER_OK(op, "?;?;?;?;?;?;?;?;?;?",
|
||||
("[?,2];[?,2];[?];[?];[2];[2];" // sparse outputs
|
||||
"[?,?,1];[?,?,1,2];[?,?,1,2,3]")); // dense outputs
|
||||
INFER_OK(op, "[?];?;?;?;?;?;?;?;?;?",
|
||||
("[?,2];[?,2];[?];[?];[2];[2];" // sparse outputs
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3]")); // dense outputs
|
||||
INFER_OK(op, "[10];?;?;?;?;?;?;?;?;?",
|
||||
("[?,2];[?,2];[?];[?];[2];[2];" // sparse outputs
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3]")); // dense outputs
|
||||
@ -209,7 +212,7 @@ TEST(ParsingOpsTest, ParseSequenceExample_ShapeFn) {
|
||||
INFER_OK(op, "[?];[?]",
|
||||
("[?,3];[?,3];[?];[?];[3];[3];" // feature_list sparse
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3];" // feature_list dense
|
||||
"[d0_0];[d0_0];[d0_0]")); // feature_list length
|
||||
"in0;in0;in0")); // feature_list length
|
||||
|
||||
// Combine previous two test cases.
|
||||
set_outputs(2, 3, 2, 3);
|
||||
@ -218,7 +221,7 @@ TEST(ParsingOpsTest, ParseSequenceExample_ShapeFn) {
|
||||
"[d0_0,1];[d0_0,1,2];[d0_0,1,2,3];" // context dense
|
||||
"[?,3];[?,3];[?];[?];[3];[3];" // feature_list sparse
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3];" // feature_list dense
|
||||
"[d0_0];[d0_0];[d0_0]")); // feature_list length
|
||||
"in0;in0;in0")); // feature_list length
|
||||
|
||||
// Confirm an error from ParseSequenceExampleAttrs.Init().
|
||||
set_outputs(1, 1, 1, 1, true /* add_extra_shape */);
|
||||
@ -297,4 +300,90 @@ TEST(ParsingOpsTest, ParseSingleSequenceExample_ShapeFn) {
|
||||
"?;?;?;?;?;?;?;?");
|
||||
}
|
||||
|
||||
TEST(ParsingOpsTest, ParseExampleV2_ShapeFn) {
|
||||
ShapeInferenceTestOp op("ParseExampleV2");
|
||||
auto set_outputs = [&op](int num_sparse, int num_dense, int num_ragged,
|
||||
bool add_extra_shape = false,
|
||||
int unknown_outer_dims = 0) {
|
||||
using NodeOutList = std::vector<NodeDefBuilder::NodeOut>;
|
||||
using DataTypeList = std::vector<DataType>;
|
||||
NodeDefBuilder::NodeOut string_in{"a", 0, DT_STRING};
|
||||
|
||||
TF_ASSERT_OK(
|
||||
NodeDefBuilder("test", "ParseExampleV2")
|
||||
.Input("serialized", 0, DT_STRING)
|
||||
.Input("names", 0, DT_STRING)
|
||||
.Input("sparse_keys", 0, DT_STRING)
|
||||
.Input("dense_keys", 0, DT_STRING)
|
||||
.Input("ragged_keys", 0, DT_STRING)
|
||||
.Input(NodeOutList(num_dense, string_in)) // dense_defaults
|
||||
.Attr("num_sparse", num_sparse)
|
||||
.Attr("sparse_types", DataTypeList(num_sparse, DT_FLOAT))
|
||||
.Attr("ragged_value_types", DataTypeList(num_ragged, DT_FLOAT))
|
||||
.Attr("ragged_split_types", DataTypeList(num_ragged, DT_INT32))
|
||||
.Attr("dense_shapes", MakeDenseShapes(num_dense, add_extra_shape,
|
||||
unknown_outer_dims))
|
||||
.Finalize(&op.node_def));
|
||||
};
|
||||
|
||||
// Verify inputs 'serialized' and 'names'.
|
||||
set_outputs(0 /* num_sparse */, 0 /* num_dense */, 0 /* num_ragged */);
|
||||
INFER_OK(op, "?;?;[0];[0];[0]", "");
|
||||
INFER_OK(op, "[10];[10];[0];[0];[0]", "");
|
||||
INFER_OK(op, "[];[];[0];[0];[0]", "");
|
||||
INFER_ERROR("must be at most rank 1", op, "[1,2];?;?;?;?");
|
||||
INFER_ERROR("must be at most rank 1", op, "?;[2,3];?;?;?");
|
||||
|
||||
// Verify the sparse, dense, and ragged outputs.
|
||||
set_outputs(2 /* num_sparse */, 3 /* num_dense */, 4 /* num_ragged */);
|
||||
INFER_OK(op, "[?];?;?;?;?;?;?;?", // Vector input, unknown size
|
||||
("[?,2];[?,2];" // sparse indices
|
||||
"[?];[?];" // sparse values
|
||||
"[2];[2];" // sparse dense_shapes
|
||||
"[d0_0,1];[d0_0,1,2];[d0_0,1,2,3];" // dense outputs
|
||||
"[?];[?];[?];[?];" // ragged values
|
||||
"[?];[?];[?];[?]")); // ragged row_splits
|
||||
INFER_OK(op, "[10];?;?;?;?;?;?;?", // Vector input, known size
|
||||
("[?,2];[?,2];" // sparse indices
|
||||
"[?];[?];" // sparse values
|
||||
"[2];[2];" // sparse dense_shapes
|
||||
"[d0_0,1];[d0_0,1,2];[d0_0,1,2,3];" // dense outputs
|
||||
"[?];[?];[?];[?];" // ragged values
|
||||
"[11];[11];[11];[11]")); // ragged row_splits
|
||||
INFER_OK(op, "[];?;?;?;?;?;?;?", // Scalar input
|
||||
("[?,1];[?,1];" // sparse indices
|
||||
"[?];[?];" // sparse values
|
||||
"[1];[1];" // sparse dense_shapes
|
||||
"[1];[1,2];[1,2,3];" // dense outputs
|
||||
"[?];[?];[?];[?];" // ragged values
|
||||
"[?];[?];[?];[?]")); // ragged row_splits
|
||||
INFER_OK(op, "?;?;?;?;?;?;?;?", // Input with unknown rank
|
||||
("[?,?];[?,?];" // sparse indices
|
||||
"[?];[?];" // sparse values
|
||||
"[?];[?];" // sparse dense_shapes
|
||||
"?;?;?;" // dense outputs
|
||||
"[?];[?];[?];[?];" // ragged values
|
||||
"[?];[?];[?];[?]")); // ragged row_splits
|
||||
|
||||
// Confirm an error from ParseExampleAttrs.Init().
|
||||
set_outputs(2, 3, 0, true /* add_extra_shape */);
|
||||
INFER_ERROR("len(dense_keys) != len(dense_shapes)", op, "?;?;?;?;?;?;?;?");
|
||||
set_outputs(2, 3, 0, true /* add_extra_shape */, 1 /* unknown_outer_dims */);
|
||||
INFER_ERROR("len(dense_keys) != len(dense_shapes)", op, "?;?;?;?;?;?;?;?");
|
||||
|
||||
// Allow variable strides
|
||||
set_outputs(2, 3, 0, false /* add_extra_shape */, 1 /* unknown_outer_dims */);
|
||||
INFER_OK(op, "[?];?;?;?;?;?;?;?",
|
||||
("[?,2];[?,2];[?];[?];[2];[2];" // sparse outputs
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3]")); // dense outputs
|
||||
INFER_OK(op, "[10];?;?;?;?;?;?;?",
|
||||
("[?,2];[?,2];[?];[?];[2];[2];" // sparse outputs
|
||||
"[d0_0,?,1];[d0_0,?,1,2];[d0_0,?,1,2,3]")); // dense outputs
|
||||
|
||||
// Variable inner dimensions are not supported
|
||||
set_outputs(2, 3, 0, false /* add_extra_shape */, 2 /* unknown_outer_dims */);
|
||||
INFER_ERROR("shapes[0] has unknown rank or unknown inner dimensions", op,
|
||||
"?;?;?;?;?;?;?;?");
|
||||
}
|
||||
|
||||
} // end namespace tensorflow
|
||||
|
@ -460,8 +460,9 @@ void ParallelFor(const std::function<void(size_t)>& f, size_t n,
|
||||
}
|
||||
}
|
||||
|
||||
enum class Type { Sparse, Dense };
|
||||
enum class Type { Sparse, Dense, Ragged };
|
||||
|
||||
// Note: We use SparseBuffer for sparse, ragged, and dense_varlen features.
|
||||
struct SparseBuffer {
|
||||
// Features are in one of the 3 vectors below depending on config's dtype.
|
||||
// Other 2 vectors remain empty.
|
||||
@ -548,9 +549,11 @@ Status FastParseSerializedExample(
|
||||
SeededHasher hasher, std::vector<Tensor>* output_dense,
|
||||
std::vector<SparseBuffer>* output_varlen_dense,
|
||||
std::vector<SparseBuffer>* output_sparse,
|
||||
std::vector<SparseBuffer>* output_ragged,
|
||||
PerExampleFeatureStats* output_stats) {
|
||||
DCHECK(output_dense != nullptr);
|
||||
DCHECK(output_sparse != nullptr);
|
||||
DCHECK(output_ragged != nullptr);
|
||||
parsed::Example parsed_example;
|
||||
if (!ParseExample(serialized_example, &parsed_example)) {
|
||||
return errors::InvalidArgument("Could not parse example input, value: '",
|
||||
@ -558,6 +561,7 @@ Status FastParseSerializedExample(
|
||||
}
|
||||
std::vector<int64> sparse_feature_last_example(config.sparse.size(), -1);
|
||||
std::vector<int64> dense_feature_last_example(config.dense.size(), -1);
|
||||
std::vector<int64> ragged_feature_last_example(config.ragged.size(), -1);
|
||||
|
||||
// Handle features present in the example.
|
||||
const size_t parsed_example_size = parsed_example.size();
|
||||
@ -584,13 +588,15 @@ Status FastParseSerializedExample(
|
||||
|
||||
size_t d = d_and_type.first;
|
||||
bool is_dense = d_and_type.second == Type::Dense;
|
||||
bool is_ragged = d_and_type.second == Type::Ragged;
|
||||
|
||||
{
|
||||
// Testing for PresizedCuckooMap collision.
|
||||
// TODO(lew): Use dense_hash_map and avoid this and hasher creation.
|
||||
const string& config_feature_name = is_dense
|
||||
? config.dense[d].feature_name
|
||||
: config.sparse[d].feature_name;
|
||||
const string& config_feature_name =
|
||||
is_dense ? config.dense[d].feature_name
|
||||
: (is_ragged ? config.ragged[d].feature_name
|
||||
: config.sparse[d].feature_name);
|
||||
if (feature_name != config_feature_name) continue;
|
||||
}
|
||||
|
||||
@ -756,25 +762,30 @@ Status FastParseSerializedExample(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Feature is sparse or ragged.
|
||||
auto& last_example =
|
||||
is_ragged ? ragged_feature_last_example : sparse_feature_last_example;
|
||||
|
||||
// If feature was already visited, skip.
|
||||
// Compare comment at the beginning of the loop.
|
||||
if (sparse_feature_last_example[d] == example_index) {
|
||||
if (last_example[d] == example_index) {
|
||||
LogSparseFeatureDataLoss(feature_name);
|
||||
continue;
|
||||
}
|
||||
sparse_feature_last_example[d] = example_index;
|
||||
last_example[d] = example_index;
|
||||
|
||||
// Handle sparse features.
|
||||
SparseBuffer& out = (*output_sparse)[d];
|
||||
if (example_dtype != DT_INVALID &&
|
||||
example_dtype != config.sparse[d].dtype) {
|
||||
return example_error(strings::StrCat(
|
||||
"Data types don't match. ",
|
||||
"Expected type: ", DataTypeString(config.sparse[d].dtype),
|
||||
", Actual type: ", DataTypeString(example_dtype)));
|
||||
SparseBuffer& out = is_ragged ? (*output_ragged)[d] : (*output_sparse)[d];
|
||||
DataType feature_dtype =
|
||||
is_ragged ? config.ragged[d].dtype : config.sparse[d].dtype;
|
||||
if (example_dtype != DT_INVALID && example_dtype != feature_dtype) {
|
||||
return example_error(
|
||||
strings::StrCat("Data types don't match. ",
|
||||
"Expected type: ", DataTypeString(feature_dtype),
|
||||
", Actual type: ", DataTypeString(example_dtype)));
|
||||
}
|
||||
|
||||
switch (config.sparse[d].dtype) {
|
||||
switch (feature_dtype) {
|
||||
case DT_INT64: {
|
||||
if (example_dtype != DT_INVALID) {
|
||||
if (!feature.ParseInt64List(&out.int64_list)) {
|
||||
@ -880,6 +891,15 @@ Status FastParseSerializedExample(
|
||||
out.example_end_indices.push_back(prev_example_end_index);
|
||||
}
|
||||
|
||||
// Handle missing ragged features.
|
||||
for (size_t d = 0; d < config.ragged.size(); ++d) {
|
||||
if (ragged_feature_last_example[d] == example_index) continue;
|
||||
SparseBuffer& out = (*output_ragged)[d];
|
||||
size_t prev_example_end_index =
|
||||
out.example_end_indices.empty() ? 0 : out.example_end_indices.back();
|
||||
out.example_end_indices.push_back(prev_example_end_index);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -895,6 +915,24 @@ Status CheckConfigDataType(DataType dtype) {
|
||||
}
|
||||
}
|
||||
|
||||
Status CheckConfigDataTypes(Config config) {
|
||||
// Check config so we can safely CHECK(false) in switches on config.*.dtype
|
||||
for (auto& c : config.sparse) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
for (auto& c : config.dense) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
for (auto& c : config.ragged) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
if (!(c.splits_dtype == DT_INT32 || c.splits_dtype == DT_INT64)) {
|
||||
return errors::InvalidArgument("Invalid ragged_split_type: ",
|
||||
DataTypeString(c.splits_dtype));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const SmallVector<T>& GetListFromBuffer(const SparseBuffer& buffer);
|
||||
|
||||
@ -999,6 +1037,46 @@ class TensorVector {
|
||||
T* data_ = nullptr;
|
||||
};
|
||||
|
||||
void CountSparseFeatures(
|
||||
const std::vector<std::vector<SparseBuffer>>& sparse_buffers, size_t d,
|
||||
size_t* total_num_features, size_t* max_num_features) {
|
||||
for (auto& sparse_values_tmp : sparse_buffers) {
|
||||
const std::vector<size_t>& end_indices =
|
||||
sparse_values_tmp[d].example_end_indices;
|
||||
*total_num_features += end_indices.back();
|
||||
*max_num_features = std::max(*max_num_features, end_indices[0]);
|
||||
for (size_t i = 1; i < end_indices.size(); ++i) {
|
||||
size_t example_size = end_indices[i] - end_indices[i - 1];
|
||||
*max_num_features = std::max(*max_num_features, example_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CopySparseBufferToTensor(DataType dtype, size_t offset, SparseBuffer* src,
|
||||
Tensor* dst) {
|
||||
switch (dtype) {
|
||||
case DT_INT64: {
|
||||
std::copy(src->int64_list.begin(), src->int64_list.end(),
|
||||
dst->flat<int64>().data() + offset);
|
||||
break;
|
||||
}
|
||||
case DT_FLOAT: {
|
||||
std::copy(src->float_list.begin(), src->float_list.end(),
|
||||
dst->flat<float>().data() + offset);
|
||||
break;
|
||||
}
|
||||
case DT_STRING: {
|
||||
std::move(src->bytes_list.begin(), src->bytes_list.end(),
|
||||
dst->flat<string>().data() + offset);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// We checked that dtype was one of these three values with
|
||||
// CheckConfigDataTypes().
|
||||
DCHECK(false) << "Should not happen.";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Status FastParseExample(const Config& config,
|
||||
@ -1007,18 +1085,14 @@ Status FastParseExample(const Config& config,
|
||||
thread::ThreadPool* thread_pool, Result* result) {
|
||||
DCHECK(result != nullptr);
|
||||
// Check config so we can safely CHECK(false) in switches on config.*.dtype
|
||||
for (auto& c : config.sparse) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
for (auto& c : config.dense) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataTypes(config));
|
||||
|
||||
if (config.collect_feature_stats) {
|
||||
result->feature_stats.resize(serialized.size());
|
||||
}
|
||||
|
||||
size_t config_size = config.dense.size() + config.sparse.size();
|
||||
size_t config_size =
|
||||
config.dense.size() + config.sparse.size() + config.ragged.size();
|
||||
SeededHasher hasher;
|
||||
// Build config index.
|
||||
PresizedCuckooMap<std::pair<size_t, Type>> config_index(config_size);
|
||||
@ -1032,6 +1106,10 @@ Status FastParseExample(const Config& config,
|
||||
ok &= config_index.InsertUnique(hasher(config.sparse[d].feature_name),
|
||||
{d, Type::Sparse});
|
||||
}
|
||||
for (size_t d = 0; d < config.ragged.size(); ++d) {
|
||||
ok &= config_index.InsertUnique(hasher(config.ragged[d].feature_name),
|
||||
{d, Type::Ragged});
|
||||
}
|
||||
if (ok) break;
|
||||
LOG(WARNING) << "Collision found. This should happen only if you have "
|
||||
"around 2^32 entries in your config.";
|
||||
@ -1045,7 +1123,7 @@ Status FastParseExample(const Config& config,
|
||||
}
|
||||
|
||||
// Allocate dense output for fixed length dense values
|
||||
// (variable-length dense and sparse have to be buffered).
|
||||
// (variable-length dense and sparse and ragged have to be buffered).
|
||||
std::vector<Tensor> fixed_dense_values(config.dense.size());
|
||||
for (size_t d = 0; d < config.dense.size(); ++d) {
|
||||
if (config.dense[d].variable_length) continue;
|
||||
@ -1097,10 +1175,12 @@ Status FastParseExample(const Config& config,
|
||||
// Do minibatches in parallel.
|
||||
std::vector<std::vector<SparseBuffer>> sparse_buffers(num_minibatches);
|
||||
std::vector<std::vector<SparseBuffer>> varlen_dense_buffers(num_minibatches);
|
||||
std::vector<std::vector<SparseBuffer>> ragged_buffers(num_minibatches);
|
||||
std::vector<Status> status_of_minibatch(num_minibatches);
|
||||
auto ProcessMiniBatch = [&](size_t minibatch) {
|
||||
sparse_buffers[minibatch].resize(config.sparse.size());
|
||||
varlen_dense_buffers[minibatch].resize(config.dense.size());
|
||||
ragged_buffers[minibatch].resize(config.ragged.size());
|
||||
size_t start = first_example_of_minibatch(minibatch);
|
||||
size_t end = first_example_of_minibatch(minibatch + 1);
|
||||
for (size_t e = start; e < end; ++e) {
|
||||
@ -1112,7 +1192,8 @@ Status FastParseExample(const Config& config,
|
||||
serialized[e],
|
||||
(!example_names.empty() ? example_names[e] : "<unknown>"), e, config,
|
||||
config_index, hasher, &fixed_dense_values,
|
||||
&varlen_dense_buffers[minibatch], &sparse_buffers[minibatch], stats);
|
||||
&varlen_dense_buffers[minibatch], &sparse_buffers[minibatch],
|
||||
&ragged_buffers[minibatch], stats);
|
||||
if (!status_of_minibatch[minibatch].ok()) break;
|
||||
}
|
||||
};
|
||||
@ -1132,16 +1213,8 @@ Status FastParseExample(const Config& config,
|
||||
// Loop over minibatches
|
||||
size_t total_num_features = 0;
|
||||
size_t max_num_features = 0;
|
||||
for (auto& sparse_values_tmp : sparse_buffers) {
|
||||
const std::vector<size_t>& end_indices =
|
||||
sparse_values_tmp[d].example_end_indices;
|
||||
total_num_features += end_indices.back();
|
||||
max_num_features = std::max(max_num_features, end_indices[0]);
|
||||
for (size_t i = 1; i < end_indices.size(); ++i) {
|
||||
size_t example_size = end_indices[i] - end_indices[i - 1];
|
||||
max_num_features = std::max(max_num_features, example_size);
|
||||
}
|
||||
}
|
||||
CountSparseFeatures(sparse_buffers, d, &total_num_features,
|
||||
&max_num_features);
|
||||
|
||||
TensorShape indices_shape;
|
||||
indices_shape.AddDim(total_num_features);
|
||||
@ -1161,7 +1234,7 @@ Status FastParseExample(const Config& config,
|
||||
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < sparse_buffers.size(); ++i) {
|
||||
const SparseBuffer& buffer = sparse_buffers[i][d];
|
||||
SparseBuffer& buffer = sparse_buffers[i][d];
|
||||
|
||||
// Update indices.
|
||||
int64* ix_p = &indices->matrix<int64>()(offset, 0);
|
||||
@ -1180,28 +1253,61 @@ Status FastParseExample(const Config& config,
|
||||
++example_index;
|
||||
}
|
||||
|
||||
// Copy values over.
|
||||
switch (config.sparse[d].dtype) {
|
||||
case DT_INT64: {
|
||||
std::copy(buffer.int64_list.begin(), buffer.int64_list.end(),
|
||||
values->flat<int64>().data() + offset);
|
||||
break;
|
||||
CopySparseBufferToTensor(config.sparse[d].dtype, offset, &buffer, values);
|
||||
offset += delta;
|
||||
}
|
||||
};
|
||||
|
||||
// Merge SparseBuffers from all minibatches for every config.ragged.
|
||||
auto MergeRaggedMinibatches = [&](size_t d) {
|
||||
// Loop over minibatches
|
||||
size_t total_num_features = 0;
|
||||
size_t max_num_features = 0;
|
||||
CountSparseFeatures(ragged_buffers, d, &total_num_features,
|
||||
&max_num_features);
|
||||
|
||||
TensorShape row_splits_shape;
|
||||
row_splits_shape.AddDim(serialized.size() + 1);
|
||||
result->ragged_splits.emplace_back(config.ragged[d].splits_dtype,
|
||||
row_splits_shape);
|
||||
Tensor* row_splits = &result->ragged_splits.back();
|
||||
if (config.ragged[d].splits_dtype == DT_INT64) {
|
||||
row_splits->flat<int64>()(0) = 0;
|
||||
} else {
|
||||
row_splits->flat<int32>()(0) = 0;
|
||||
}
|
||||
|
||||
TensorShape values_shape;
|
||||
values_shape.AddDim(total_num_features);
|
||||
result->ragged_values.emplace_back(config.ragged[d].dtype, values_shape);
|
||||
Tensor* values = &result->ragged_values.back();
|
||||
|
||||
size_t values_offset = 0;
|
||||
size_t splits_offset = 0;
|
||||
for (size_t i = 0; i < ragged_buffers.size(); ++i) {
|
||||
SparseBuffer& buffer = ragged_buffers[i][d];
|
||||
if (buffer.example_end_indices.empty()) continue;
|
||||
|
||||
// Update row_splits. row_splits are formed by concatenating the example
|
||||
// end_indices (adjusting each to start after the previous one ends).
|
||||
if (config.ragged[d].splits_dtype == DT_INT64) {
|
||||
int64* row_splits_out = &row_splits->flat<int64>()(splits_offset);
|
||||
int64 start = *row_splits_out;
|
||||
for (size_t example_end_index : buffer.example_end_indices) {
|
||||
*++row_splits_out = start + example_end_index;
|
||||
}
|
||||
case DT_FLOAT: {
|
||||
std::copy(buffer.float_list.begin(), buffer.float_list.end(),
|
||||
values->flat<float>().data() + offset);
|
||||
break;
|
||||
} else {
|
||||
int32* row_splits_out = &row_splits->flat<int32>()(splits_offset);
|
||||
int32 start = *row_splits_out;
|
||||
for (size_t example_end_index : buffer.example_end_indices) {
|
||||
*++row_splits_out = start + example_end_index;
|
||||
}
|
||||
case DT_STRING: {
|
||||
std::move(buffer.bytes_list.begin(), buffer.bytes_list.end(),
|
||||
values->flat<tstring>().data() + offset);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG(FATAL) << "Should not happen.";
|
||||
}
|
||||
|
||||
offset += delta;
|
||||
CopySparseBufferToTensor(config.ragged[d].dtype, values_offset, &buffer,
|
||||
values);
|
||||
values_offset += buffer.example_end_indices.back();
|
||||
splits_offset += buffer.example_end_indices.size();
|
||||
}
|
||||
};
|
||||
|
||||
@ -1270,6 +1376,10 @@ Status FastParseExample(const Config& config,
|
||||
MergeSparseMinibatches(d);
|
||||
}
|
||||
|
||||
for (size_t d = 0; d < config.ragged.size(); ++d) {
|
||||
MergeRaggedMinibatches(d);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -1277,12 +1387,7 @@ Status FastParseSingleExample(const Config& config,
|
||||
absl::string_view serialized, Result* result) {
|
||||
DCHECK(result != nullptr);
|
||||
// Check config so we can safely CHECK(false) in switches on config.*.dtype
|
||||
for (auto& c : config.sparse) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
for (auto& c : config.dense) {
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataType(c.dtype));
|
||||
}
|
||||
TF_RETURN_IF_ERROR(CheckConfigDataTypes(config));
|
||||
|
||||
PerExampleFeatureStats* stats = nullptr;
|
||||
if (config.collect_feature_stats) {
|
||||
|
@ -57,8 +57,15 @@ struct FastParseExampleConfig {
|
||||
DataType dtype;
|
||||
};
|
||||
|
||||
struct Ragged {
|
||||
string feature_name;
|
||||
DataType dtype;
|
||||
DataType splits_dtype;
|
||||
};
|
||||
|
||||
std::vector<Dense> dense;
|
||||
std::vector<Sparse> sparse;
|
||||
std::vector<Ragged> ragged;
|
||||
|
||||
// If `true`, `Result::feature_stats` will contain one
|
||||
// `PerExampleFeatureStats` for each serialized example in the input.
|
||||
@ -88,6 +95,8 @@ struct Result {
|
||||
std::vector<Tensor> sparse_values;
|
||||
std::vector<Tensor> sparse_shapes;
|
||||
std::vector<Tensor> dense_values;
|
||||
std::vector<Tensor> ragged_values;
|
||||
std::vector<Tensor> ragged_splits;
|
||||
|
||||
// This vector will be populated with one element per example if
|
||||
// `FastParseExampleConfig::collect_feature_stats` is set to `true`.
|
||||
|
@ -403,7 +403,18 @@ Status BatchExampleProtoToTensors(
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ParseExampleAttrs::FinishInit() {
|
||||
Status ParseExampleAttrs::FinishInit(int op_version) {
|
||||
switch (op_version) {
|
||||
case 1:
|
||||
num_ragged = 0;
|
||||
break;
|
||||
case 2:
|
||||
num_dense = dense_types.size();
|
||||
num_ragged = ragged_value_types.size();
|
||||
break;
|
||||
default:
|
||||
return errors::InvalidArgument("Unexpected op_version", op_version);
|
||||
}
|
||||
if (static_cast<size_t>(num_sparse) != sparse_types.size()) {
|
||||
return errors::InvalidArgument("len(sparse_keys) != len(sparse_types)");
|
||||
}
|
||||
@ -413,6 +424,14 @@ Status ParseExampleAttrs::FinishInit() {
|
||||
if (static_cast<size_t>(num_dense) != dense_shapes.size()) {
|
||||
return errors::InvalidArgument("len(dense_keys) != len(dense_shapes)");
|
||||
}
|
||||
if (static_cast<size_t>(num_ragged) != ragged_value_types.size()) {
|
||||
return errors::InvalidArgument(
|
||||
"len(ragged_keys) != len(ragged_value_types)");
|
||||
}
|
||||
if (static_cast<size_t>(num_ragged) != ragged_split_types.size()) {
|
||||
return errors::InvalidArgument(
|
||||
"len(ragged_keys) != len(ragged_split_types)");
|
||||
}
|
||||
if (num_dense > std::numeric_limits<int32>::max()) {
|
||||
return errors::InvalidArgument("num_dense_ too large");
|
||||
}
|
||||
@ -422,6 +441,15 @@ Status ParseExampleAttrs::FinishInit() {
|
||||
for (const DataType& type : sparse_types) {
|
||||
TF_RETURN_IF_ERROR(CheckValidType(type));
|
||||
}
|
||||
for (const DataType& type : ragged_value_types) {
|
||||
TF_RETURN_IF_ERROR(CheckValidType(type));
|
||||
}
|
||||
for (const DataType& type : ragged_split_types) {
|
||||
if (!(type == DT_INT64 || type == DT_INT32)) {
|
||||
return errors::InvalidArgument("Invalid ragged_split_type: ",
|
||||
DataTypeString(type));
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -536,4 +564,41 @@ Status ParseSingleSequenceExampleAttrs::FinishInit() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status GetDenseShapes(const std::vector<PartialTensorShape>& dense_shapes,
|
||||
std::vector<bool>* variable_length,
|
||||
std::vector<std::size_t>* elements_per_stride) {
|
||||
// Temporary check until we start allowing a variable length outer
|
||||
// dimension.
|
||||
for (int i = 0; i < dense_shapes.size(); ++i) {
|
||||
bool shape_ok = true;
|
||||
if (dense_shapes[i].dims() == -1) {
|
||||
shape_ok = false;
|
||||
} else {
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
if (dense_shapes[i].dim_size(d) == -1) {
|
||||
shape_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!shape_ok) {
|
||||
return errors::InvalidArgument(
|
||||
"dense_shapes[", i,
|
||||
"] has unknown rank or unknown inner dimensions: ",
|
||||
dense_shapes[i].DebugString());
|
||||
}
|
||||
TensorShape dense_shape;
|
||||
if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
|
||||
variable_length->push_back(true);
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
dense_shape.AddDim(dense_shapes[i].dim_size(d));
|
||||
}
|
||||
} else {
|
||||
variable_length->push_back(false);
|
||||
dense_shapes[i].AsTensorShape(&dense_shape);
|
||||
}
|
||||
elements_per_stride->push_back(dense_shape.num_elements());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
@ -150,66 +150,60 @@ Tensor FeatureSparseCopy(const std::size_t batch, const string& key,
|
||||
int64 CopyIntoSparseTensor(const Tensor& in, const int batch,
|
||||
const int64 offset, Tensor* indices, Tensor* values);
|
||||
|
||||
// Check that each dense_shape has known rank and inner dimensions; and
|
||||
// update variable_length (whether the outer dimension is None) and
|
||||
// elements_per_stride for each denes_shape.
|
||||
Status GetDenseShapes(const std::vector<PartialTensorShape>& dense_shapes,
|
||||
std::vector<bool>* variable_length,
|
||||
std::vector<std::size_t>* elements_per_stride);
|
||||
|
||||
// Parses the attributes passed to ParseExample.
|
||||
// REQUIRES: Init must be called after construction.
|
||||
class ParseExampleAttrs {
|
||||
struct ParseExampleAttrs {
|
||||
public:
|
||||
template <typename ContextType>
|
||||
Status Init(ContextType* ctx) {
|
||||
Status Init(ContextType* ctx, int op_version = 1) {
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
|
||||
// Temporary check until we start allowing a variable length outer
|
||||
// dimension.
|
||||
for (int i = 0; i < dense_shapes.size(); ++i) {
|
||||
bool shape_ok = true;
|
||||
if (dense_shapes[i].dims() == -1) {
|
||||
shape_ok = false;
|
||||
} else {
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
if (dense_shapes[i].dim_size(d) == -1) {
|
||||
shape_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!shape_ok) {
|
||||
return errors::InvalidArgument(
|
||||
"dense_shapes[", i,
|
||||
"] has unknown rank or unknown inner dimensions: ",
|
||||
dense_shapes[i].DebugString());
|
||||
}
|
||||
TensorShape dense_shape;
|
||||
if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
|
||||
variable_length.push_back(true);
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
dense_shape.AddDim(dense_shapes[i].dim_size(d));
|
||||
}
|
||||
} else {
|
||||
variable_length.push_back(false);
|
||||
dense_shapes[i].AsTensorShape(&dense_shape);
|
||||
}
|
||||
elements_per_stride.push_back(dense_shape.num_elements());
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetDenseShapes(dense_shapes, &variable_length, &elements_per_stride));
|
||||
switch (op_version) {
|
||||
case 1:
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense));
|
||||
break;
|
||||
case 2:
|
||||
TF_RETURN_IF_ERROR(
|
||||
ctx->GetAttr("ragged_value_types", &ragged_value_types));
|
||||
TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse));
|
||||
TF_RETURN_IF_ERROR(
|
||||
ctx->GetAttr("ragged_split_types", &ragged_split_types));
|
||||
break;
|
||||
default:
|
||||
return errors::InvalidArgument("Unexpected op_version", op_version);
|
||||
}
|
||||
return FinishInit();
|
||||
return FinishInit(op_version);
|
||||
}
|
||||
|
||||
int64 num_sparse;
|
||||
int64 num_dense;
|
||||
int64 num_ragged;
|
||||
std::vector<DataType> sparse_types;
|
||||
std::vector<DataType> dense_types;
|
||||
std::vector<DataType> ragged_value_types;
|
||||
std::vector<DataType> ragged_split_types;
|
||||
std::vector<PartialTensorShape> dense_shapes;
|
||||
std::vector<bool> variable_length;
|
||||
std::vector<std::size_t> elements_per_stride;
|
||||
|
||||
private:
|
||||
Status FinishInit(); // for context-independent parts of Init.
|
||||
Status FinishInit(int op_version); // for context-independent parts of Init.
|
||||
};
|
||||
|
||||
// Parses the attributes passed to ParseSingleExample.
|
||||
// REQUIRES: Init must be called after construction.
|
||||
class ParseSingleExampleAttrs {
|
||||
struct ParseSingleExampleAttrs {
|
||||
public:
|
||||
template <typename ContextType>
|
||||
Status Init(ContextType* ctx) {
|
||||
@ -227,37 +221,8 @@ class ParseSingleExampleAttrs {
|
||||
sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")");
|
||||
}
|
||||
|
||||
// Temporary check until we start allowing a variable length outer
|
||||
// dimension.
|
||||
for (int i = 0; i < dense_shapes.size(); ++i) {
|
||||
bool shape_ok = true;
|
||||
if (dense_shapes[i].dims() == -1) {
|
||||
shape_ok = false;
|
||||
} else {
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
if (dense_shapes[i].dim_size(d) == -1) {
|
||||
shape_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!shape_ok) {
|
||||
return errors::InvalidArgument(
|
||||
"dense_shapes[", i,
|
||||
"] has unknown rank or unknown inner dimensions: ",
|
||||
dense_shapes[i].DebugString());
|
||||
}
|
||||
TensorShape dense_shape;
|
||||
if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
|
||||
variable_length.push_back(true);
|
||||
for (int d = 1; d < dense_shapes[i].dims(); ++d) {
|
||||
dense_shape.AddDim(dense_shapes[i].dim_size(d));
|
||||
}
|
||||
} else {
|
||||
variable_length.push_back(false);
|
||||
dense_shapes[i].AsTensorShape(&dense_shape);
|
||||
}
|
||||
elements_per_stride.push_back(dense_shape.num_elements());
|
||||
}
|
||||
TF_RETURN_IF_ERROR(
|
||||
GetDenseShapes(dense_shapes, &variable_length, &elements_per_stride));
|
||||
return FinishInit();
|
||||
}
|
||||
|
||||
@ -275,7 +240,7 @@ class ParseSingleExampleAttrs {
|
||||
|
||||
// Parses the attributes passed to ParseSequenceExample.
|
||||
// REQUIRES: Init must be called after construction.
|
||||
class ParseSequenceExampleAttrs {
|
||||
struct ParseSequenceExampleAttrs {
|
||||
public:
|
||||
template <typename ContextType>
|
||||
Status Init(ContextType* ctx) {
|
||||
@ -335,7 +300,7 @@ class ParseSequenceExampleAttrs {
|
||||
|
||||
// Parses the attributes passed to ParseSingleSequenceExample.
|
||||
// REQUIRES: Init must be called after construction.
|
||||
class ParseSingleSequenceExampleAttrs {
|
||||
struct ParseSingleSequenceExampleAttrs {
|
||||
public:
|
||||
template <typename ContextType>
|
||||
Status Init(ContextType* ctx) {
|
||||
|
@ -2540,6 +2540,10 @@ tf_module {
|
||||
name: "ParseExampleDataset"
|
||||
argspec: "args=[\'input_dataset\', \'num_parallel_calls\', \'dense_defaults\', \'sparse_keys\', \'dense_keys\', \'sparse_types\', \'dense_shapes\', \'output_types\', \'output_shapes\', \'sloppy\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "ParseExampleV2"
|
||||
argspec: "args=[\'serialized\', \'names\', \'sparse_keys\', \'dense_keys\', \'ragged_keys\', \'dense_defaults\', \'num_sparse\', \'sparse_types\', \'ragged_value_types\', \'ragged_split_types\', \'dense_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "ParseSequenceExample"
|
||||
argspec: "args=[\'serialized\', \'debug_name\', \'context_dense_defaults\', \'feature_list_dense_missing_assumed_empty\', \'context_sparse_keys\', \'context_dense_keys\', \'feature_list_sparse_keys\', \'feature_list_dense_keys\', \'Ncontext_sparse\', \'Ncontext_dense\', \'Nfeature_list_sparse\', \'Nfeature_list_dense\', \'context_sparse_types\', \'feature_list_dense_types\', \'context_dense_shapes\', \'feature_list_sparse_types\', \'feature_list_dense_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'0\', \'0\', \'[]\', \'[]\', \'[]\', \'[]\', \'[]\', \'None\'], "
|
||||
|
@ -2540,6 +2540,10 @@ tf_module {
|
||||
name: "ParseExampleDataset"
|
||||
argspec: "args=[\'input_dataset\', \'num_parallel_calls\', \'dense_defaults\', \'sparse_keys\', \'dense_keys\', \'sparse_types\', \'dense_shapes\', \'output_types\', \'output_shapes\', \'sloppy\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "ParseExampleV2"
|
||||
argspec: "args=[\'serialized\', \'names\', \'sparse_keys\', \'dense_keys\', \'ragged_keys\', \'dense_defaults\', \'num_sparse\', \'sparse_types\', \'ragged_value_types\', \'ragged_split_types\', \'dense_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
|
||||
}
|
||||
member_method {
|
||||
name: "ParseSequenceExample"
|
||||
argspec: "args=[\'serialized\', \'debug_name\', \'context_dense_defaults\', \'feature_list_dense_missing_assumed_empty\', \'context_sparse_keys\', \'context_dense_keys\', \'feature_list_sparse_keys\', \'feature_list_dense_keys\', \'Ncontext_sparse\', \'Ncontext_dense\', \'Nfeature_list_sparse\', \'Nfeature_list_dense\', \'context_sparse_types\', \'feature_list_dense_types\', \'context_dense_shapes\', \'feature_list_sparse_types\', \'feature_list_dense_shapes\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'0\', \'0\', \'0\', \'[]\', \'[]\', \'[]\', \'[]\', \'[]\', \'None\'], "
|
||||
|
Loading…
Reference in New Issue
Block a user