diff --git a/RELEASE.md b/RELEASE.md index 15e02c3b730..f1a8859c356 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -7,6 +7,9 @@ default, simply pass the argument `state_is_tuple=False`. * DeviceFactory's AddDevices and CreateDevices functions now return a Status instead of void. +* Int32 elements of list(type) arguments are no longer placed in host memory by + default. If necessary, a list(type) argument to a kernel can be placed in host + memory using a HostMemory annotation. # Release 0.10.0 diff --git a/tensorflow/core/framework/memory_types.cc b/tensorflow/core/framework/memory_types.cc index 6e37a3aba47..923261c529e 100644 --- a/tensorflow/core/framework/memory_types.cc +++ b/tensorflow/core/framework/memory_types.cc @@ -61,29 +61,6 @@ MemoryType MTypeFromDType(const DataType dtype) { return (dtype == DT_INT32) ? HOST_MEMORY : DEVICE_MEMORY; } -// Initialize the default memory types for type list arguments from the data -// types. (The default can be overridden by an explicit HostMemory() -// declaration.) -Status SetTypeListMTypesFromDTypes( - const NameRangeMap& name_ranges, - const protobuf::RepeatedPtrField<OpDef::ArgDef>& args, - const DataTypeVector& dtypes, MemoryTypeVector* mtypes) { - for (const auto& a : args) { - if (!a.type_list_attr().empty()) { - auto it = name_ranges.find(a.name()); - if (it == name_ranges.end()) { - return errors::InvalidArgument("Name range for argument ", a.name(), - " not found."); - } - - for (int i = it->second.first; i < it->second.second; ++i) { - (*mtypes)[i] = MTypeFromDType(dtypes[i]); - } - } - } - return Status::OK(); -} - } // namespace Status MemoryTypesForNode(const OpRegistryInterface* op_registry, @@ -107,12 +84,13 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry, inp_mtypes->clear(); out_mtypes->clear(); - if (!status.ok()) { - // When there is no kernel def for this op, we can only best-effort derive - // the memory type from the data type. For now, we assume int32 is always - // on host memory and other types are always on device memory. We should - // do type inference over function body to derive the correct - // input/output memory types. + // For functions (which have no KernelDef) and their gradients, we can only + // best-effort derive the memory type from the data type. For now, we assume + // int32 is always on host memory and other types are always on device memory. + // TODO(zhifengc,phawkins): We should do type inference over function bodies + // to derive the correct input/output memory types. We should also split + // host-memory and non host-memory arguments into separate type lists. + if (!status.ok() || ndef.op() == "SymbolicGradient") { for (const auto& t : inp_dtypes) inp_mtypes->push_back(MTypeFromDType(t)); for (const auto& t : out_dtypes) out_mtypes->push_back(MTypeFromDType(t)); return Status::OK(); @@ -127,12 +105,6 @@ Status MemoryTypesForNode(const OpRegistryInterface* op_registry, inp_mtypes->resize(GetTotal(inp_names), DEVICE_MEMORY); out_mtypes->resize(GetTotal(out_names), DEVICE_MEMORY); - // For type list arguments, mark int32 arguments as host memory. - TF_RETURN_IF_ERROR(SetTypeListMTypesFromDTypes(inp_names, op_def->input_arg(), - inp_dtypes, inp_mtypes)); - TF_RETURN_IF_ERROR(SetTypeListMTypesFromDTypes( - out_names, op_def->output_arg(), out_dtypes, out_mtypes)); - // Fills in host memory types based on the kernel def. const auto& from_proto = kdef->host_memory_arg(); std::vector<string> host_memory_args(from_proto.begin(), from_proto.end()); diff --git a/tensorflow/core/framework/memory_types_test.cc b/tensorflow/core/framework/memory_types_test.cc index c4cd875bc4b..4704da9a119 100644 --- a/tensorflow/core/framework/memory_types_test.cc +++ b/tensorflow/core/framework/memory_types_test.cc @@ -63,11 +63,11 @@ TEST(MemoryTypesForNode, Simple) { TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_CPU, node_def, &input, &output)); EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, - DEVICE_MEMORY, DEVICE_MEMORY, HOST_MEMORY, - DEVICE_MEMORY, HOST_MEMORY}), + DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, + DEVICE_MEMORY, DEVICE_MEMORY}), input); EXPECT_EQ(MemoryTypeVector({DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY, - HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY}), + DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}), output); TF_EXPECT_OK(MemoryTypesForNode(OpRegistry::Global(), DEVICE_GPU, node_def, @@ -77,7 +77,7 @@ TEST(MemoryTypesForNode, Simple) { HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, HOST_MEMORY}), input); EXPECT_EQ(MemoryTypeVector({HOST_MEMORY, HOST_MEMORY, HOST_MEMORY, - HOST_MEMORY, DEVICE_MEMORY, HOST_MEMORY}), + DEVICE_MEMORY, DEVICE_MEMORY, DEVICE_MEMORY}), output); } diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 5a0c2acd644..56253eb64a7 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -109,12 +109,20 @@ REGISTER_KERNEL_BUILDER(Name("_Retval") class PassOn : public OpKernel { public: - explicit PassOn(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { + explicit PassOn(OpKernelConstruction* ctx) : OpKernel(ctx) { OP_REQUIRES(ctx, ctx->num_inputs() == ctx->num_outputs(), errors::Internal("#inputs != #outputs : ", ctx->num_inputs(), " vs. ", ctx->num_outputs())); + for (int i = 0; i < ctx->num_inputs(); ++i) { + OP_REQUIRES( + ctx, input_type(i) == output_type(i), + errors::Internal("Input and output types for position ", i, + " do not match: ", DataTypeString(input_type(i)), + " vs. ", DataTypeString(output_type(i)))); + } + } + + void Compute(OpKernelContext* ctx) override { for (int i = 0; i < ctx->num_inputs(); ++i) { ctx->set_output(i, ctx->input(i)); } @@ -140,12 +148,14 @@ REGISTER_GPU_KERNELS(double); REGISTER_KERNEL_BUILDER(Name("_ListToArray") .Device(DEVICE_GPU) + .HostMemory("input") .HostMemory("output") .TypeConstraint<int32>("T"), PassOn); REGISTER_KERNEL_BUILDER(Name("_ArrayToList") .Device(DEVICE_GPU) .HostMemory("input") + .HostMemory("output") .TypeConstraint<int32>("T"), PassOn);