diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index fde85d8783f..5e43aab04ac 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -350,8 +350,7 @@ def _estimate_data_distribution(c, num_examples_per_class_seen): # cross-device round-trip. Just use the cached value. num_examples_per_class_seen = num_examples_per_class_seen.assign_add( math_ops.reduce_sum( - array_ops.one_hot(c, num_classes, dtype=dtypes.int64), - 0)) + array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0)) init_prob_estimate = math_ops.truediv( num_examples_per_class_seen, math_ops.reduce_sum(num_examples_per_class_seen)) @@ -445,8 +444,8 @@ class Dataset(object): output_shapes = str(output_shapes).replace("'", "") output_types = nest.map_structure(repr, self.output_types) output_types = str(output_types).replace("'", "") - return ("<%s shapes: %s, types: %s>" - % (type(self).__name__, output_shapes, output_types)) + return ("<%s shapes: %s, types: %s>" % (type(self).__name__, output_shapes, + output_types)) @staticmethod def from_tensors(tensors): @@ -595,9 +594,7 @@ class Dataset(object): dataset = dataset.repeat(num_epochs) if randomize_input: dataset = dataset.shuffle(capacity) - dataset = dataset.map( - lambda x: _parse_example(nest.flatten(x), features) - ) + dataset = dataset.map(lambda x: _parse_example(nest.flatten(x), features)) dataset = dataset.batch(batch_size) return dataset @@ -897,7 +894,7 @@ class Dataset(object): A `Dataset`. """ return self.flat_map( - map_func=lambda *args: Dataset.from_tensor_slices(args)) + map_func=lambda *args: Dataset.from_tensor_slices(args)) def filter(self, predicate): """Filters this dataset according to `predicate`. @@ -1031,12 +1028,14 @@ class ZipDataset(Dataset): @property def output_shapes(self): return nest.pack_sequence_as(self._datasets, [ - ds.output_shapes for ds in nest.flatten(self._datasets)]) + ds.output_shapes for ds in nest.flatten(self._datasets) + ]) @property def output_types(self): return nest.pack_sequence_as(self._datasets, [ - ds.output_types for ds in nest.flatten(self._datasets)]) + ds.output_types for ds in nest.flatten(self._datasets) + ]) class RepeatDataset(Dataset): @@ -1049,8 +1048,8 @@ class RepeatDataset(Dataset): if count is None: self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count") else: - self._count = ops.convert_to_tensor(count, dtype=dtypes.int64, - name="count") + self._count = ops.convert_to_tensor( + count, dtype=dtypes.int64, name="count") def make_dataset_resource(self): return gen_dataset_ops.repeat_dataset( @@ -1155,8 +1154,8 @@ class ShuffleDataset(Dataset): if seed2 is None: self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") else: - self._seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64, - name="seed2") + self._seed2 = ops.convert_to_tensor( + seed2, dtype=dtypes.int64, name="seed2") def make_dataset_resource(self): return gen_dataset_ops.shuffle_dataset( @@ -1310,12 +1309,10 @@ def _padding_value_to_tensor(value, output_type): """ value = ops.convert_to_tensor(value, name="padding_value") if not value.shape.is_compatible_with(tensor_shape.scalar()): - raise ValueError( - "Padding value should be a scalar, but is not: %s" % value) + raise ValueError("Padding value should be a scalar, but is not: %s" % value) if value.dtype != output_type: - raise TypeError( - "Padding value tensor (%s) does not match output type: %s" - % (value, output_type)) + raise TypeError("Padding value tensor (%s) does not match output type: %s" % + (value, output_type)) return value @@ -1329,20 +1326,20 @@ class PaddedBatchDataset(Dataset): self._batch_size = batch_size padding_values = (padding_values if padding_values is not None else self._default_padding(input_dataset)) - self._padded_shapes = nest.map_structure_up_to(input_dataset.output_shapes, - _partial_shape_to_tensor, - padded_shapes) - self._padding_values = nest.map_structure_up_to(input_dataset.output_shapes, - _padding_value_to_tensor, - padding_values, - input_dataset.output_types) + self._padded_shapes = nest.map_structure_up_to( + input_dataset.output_shapes, _partial_shape_to_tensor, padded_shapes) + self._padding_values = nest.map_structure_up_to( + input_dataset.output_shapes, _padding_value_to_tensor, padding_values, + input_dataset.output_types) def _default_padding(self, input_dataset): + def make_zero(t): if t.base_dtype == dtypes.string: return "" else: return np.zeros_like(t.as_numpy_dtype()) + return nest.map_structure(make_zero, input_dataset.output_types) def make_dataset_resource(self): @@ -1358,9 +1355,11 @@ class PaddedBatchDataset(Dataset): @property def output_shapes(self): + def _padded_shape_to_batch_shape(s): return tensor_shape.vector(None).concatenate( tensor_util.constant_value_as_shape(s)) + return nest.map_structure(_padded_shape_to_batch_shape, self._padded_shapes) @property @@ -1376,8 +1375,8 @@ class DenseToSparseBatchDataset(Dataset): super(DenseToSparseBatchDataset, self).__init__() if not isinstance(input_dataset.output_types, dtypes.DType): raise TypeError("DenseToSparseDataset requires an input whose elements " - "have a single component, whereas the input has %r." - % input_dataset.output_types) + "have a single component, whereas the input has %r." % + input_dataset.output_types) self._input_dataset = input_dataset self._batch_size = batch_size self._row_shape = _partial_shape_to_tensor(row_shape) @@ -1493,22 +1492,6 @@ class GroupByWindowDataset(Dataset): return self._output_types -def _most_specific_compatible_shape(s1, s2): - """Returns the most specific shape compatible with `s1` and `s2`.""" - if s1.dims is None: - return s1 - if s2.dims is None: - return s2 - s1.assert_same_rank(s2) - dims = [] - for dim1, dim2 in zip(s1, s2): - if dim1.value is None or dim2.value is None or dim1.value != dim2.value: - dims.append(tensor_shape.Dimension(None)) - else: - dims.append(dim1.value) - return tensor_shape.TensorShape(dims) - - class MapDataset(Dataset): """A `Dataset` that maps a function over elements in its input.""" @@ -1593,9 +1576,7 @@ class MapDataset(Dataset): class FlatMapDataset(Dataset): """A `Dataset` that maps a function over its input and flattens the result.""" - def __init__(self, - input_dataset, - map_func): + def __init__(self, input_dataset, map_func): """See `Dataset.flat_map()` for details.""" super(FlatMapDataset, self).__init__() self._input_dataset = input_dataset @@ -1799,8 +1780,11 @@ class FixedLengthRecordDataset(Dataset): return dtypes.string -def rejection_resample(dataset, class_func, target_dist, - initial_dist=None, seed=None): +def rejection_resample(dataset, + class_func, + target_dist, + initial_dist=None, + seed=None): """Resamples this dataset to achieve a target class distribution. **NOTE** Resampling is performed via rejection sampling; some fraction @@ -1825,36 +1809,34 @@ def rejection_resample(dataset, class_func, target_dist, target_dist = ops.convert_to_tensor(target_dist, name="initial_dist") class_values_ds = dataset.map(class_func) if initial_dist is not None: - initial_dist = ops.convert_to_tensor( - initial_dist, name="initial_dist") + initial_dist = ops.convert_to_tensor(initial_dist, name="initial_dist") acceptance_dist = _calculate_acceptance_probs(initial_dist, target_dist) initial_dist_ds = Dataset.from_tensors(initial_dist).repeat() acceptance_dist_ds = Dataset.from_tensors(acceptance_dist).repeat() else: - num_classes = (target_dist.shape[0].value - or array_ops.shape(target_dist)[0]) + num_classes = (target_dist.shape[0].value or + array_ops.shape(target_dist)[0]) smoothing_constant = 10 num_examples_per_class_seen = resource_variable_ops.ResourceVariable( - initial_value=array_ops.fill( - [num_classes], np.int64(smoothing_constant)), + initial_value=array_ops.fill([num_classes], + np.int64(smoothing_constant)), trainable=False, name="class_count", dtype=dtypes.int64) + def update_estimate_and_tile(c): return array_ops.tile( array_ops.expand_dims( _estimate_data_distribution(c, num_examples_per_class_seen), 0), [dist_estimation_batch_size, 1]) - initial_dist_ds = (class_values_ds - .batch(dist_estimation_batch_size) - .map(update_estimate_and_tile) - .unbatch()) + + initial_dist_ds = (class_values_ds.batch(dist_estimation_batch_size) + .map(update_estimate_and_tile).unbatch()) acceptance_dist_ds = initial_dist_ds.map( lambda initial: _calculate_acceptance_probs(initial, target_dist)) def maybe_warn_on_large_rejection(accept_dist, initial_dist): - proportion_rejected = math_ops.reduce_sum( - (1 - accept_dist) * initial_dist) + proportion_rejected = math_ops.reduce_sum((1 - accept_dist) * initial_dist) return control_flow_ops.cond( math_ops.less(proportion_rejected, .5), lambda: accept_dist, @@ -1864,12 +1846,10 @@ def rejection_resample(dataset, class_func, target_dist, summarize=100, first_n=10)) - acceptance_dist_ds = ( - Dataset.zip((acceptance_dist_ds, initial_dist_ds)) - .map(maybe_warn_on_large_rejection)) + acceptance_dist_ds = (Dataset.zip((acceptance_dist_ds, initial_dist_ds)) + .map(maybe_warn_on_large_rejection)) - current_probabilities_ds = (Dataset - .zip((acceptance_dist_ds, class_values_ds)) + current_probabilities_ds = (Dataset.zip((acceptance_dist_ds, class_values_ds)) .map(array_ops.gather)) filtered_ds = ( Dataset.zip((class_values_ds, current_probabilities_ds, dataset)) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 3aedbfef0d5..155fa4719b8 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -736,6 +736,36 @@ class TensorShape(object): if not self.is_compatible_with(other): raise ValueError("Shapes %s and %s are incompatible" % (self, other)) + def most_specific_compatible_shape(self, other): + """Returns the most specific TensorShape compatible with `self` and `other`. + + * TensorShape([None, 1]) is the most specific TensorShape compatible with + both TensorShape([2, 1]) and TensorShape([5, 1]). Note that + TensorShape(None) is also compatible with above mentioned TensorShapes. + + * TensorShape([1, 2, 3]) is the most specific TensorShape compatible with + both TensorShape([1, 2, 3]) and TensorShape([1, 2, 3]). There are more + less specific TensorShapes compatible with above mentioned TensorShapes, + e.g. TensorShape([1, 2, None]), TensorShape(None). + + Args: + other: Another `TensorShape`. + + Returns: + A `TensorShape` which is the most specific compatible shape of `self` + and `other`. + """ + + other = as_shape(other) + if self._dims is None or other.dims is None or self.ndims != other.ndims: + return unknown_shape() + + dims = [(Dimension(None))] * self.ndims + for i, (d1, d2) in enumerate(zip(self._dims, other.dims)): + if d1 is not None and d2 is not None and d1 == d2: + dims[i] = d1 + return TensorShape(dims) + def is_fully_defined(self): """Returns True iff `self` is fully defined in every dimension.""" return (self._dims is not None and all(dim.value is not None diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py index 0ae8d6b8217..fffd86c7a62 100644 --- a/tensorflow/python/framework/tensor_shape_test.py +++ b/tensorflow/python/framework/tensor_shape_test.py @@ -275,6 +275,26 @@ class ShapeTest(test_util.TensorFlowTestCase): tensor_shape.TensorShape([1, 2]).concatenate( tensor_shape.Dimension(3))) + def _testMostSpecificCompatibleShapeHelper(self, x, y, expected): + mcs = tensor_shape.TensorShape(x).most_specific_compatible_shape( + tensor_shape.TensorShape(y)) + mcs_dims = mcs.dims + if expected is None or mcs_dims is None: + self.assertIs(expected, mcs_dims) + else: + self.assertEqual(expected, mcs.as_list()) + + def testMostSpecificCompatibleShape(self): + self._testMostSpecificCompatibleShapeHelper([1, 2], None, None) + self._testMostSpecificCompatibleShapeHelper(None, [1, 2], None) + self._testMostSpecificCompatibleShapeHelper([1, 2], [1, 2, 3, 4], None) + self._testMostSpecificCompatibleShapeHelper([1, 2, 3, 4], [1, 2], None) + self._testMostSpecificCompatibleShapeHelper([1, 2], [1, 2], [1, 2]) + self._testMostSpecificCompatibleShapeHelper([None, 2, 3], [1, 1, 3], + [None, None, 3]) + self._testMostSpecificCompatibleShapeHelper([1, 1, 3], [None, 2, 3], + [None, None, 3]) + def testHelpers(self): tensor_shape.TensorShape([]).assert_is_compatible_with( tensor_shape.scalar()) diff --git a/tensorflow/tools/api/golden/tensorflow.-tensor-shape.pbtxt b/tensorflow/tools/api/golden/tensorflow.-tensor-shape.pbtxt index d5b9cb8f5ed..8e3598fb247 100644 --- a/tensorflow/tools/api/golden/tensorflow.-tensor-shape.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-tensor-shape.pbtxt @@ -54,6 +54,10 @@ tf_class { name: "merge_with" argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "most_specific_compatible_shape" + argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "num_elements" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"