From 67548eff5989de2b0255f2af75a07f8b9d4ea12c Mon Sep 17 00:00:00 2001 From: Yanhui Liang Date: Tue, 15 Sep 2020 13:58:47 -0700 Subject: [PATCH] Remove `tf.keras.utils.HDF5Matrix` as its deprecation date is overdue. PiperOrigin-RevId: 331848144 Change-Id: I72dbb6bf9aef527edf35b6d18278a5c1cf53fcda --- .../python/keras/engine/data_adapter.py | 4 +- tensorflow/python/keras/utils/all_utils.py | 1 - tensorflow/python/keras/utils/io_utils.py | 167 ------------------ .../python/keras/utils/io_utils_test.py | 93 ---------- ...ensorflow.keras.utils.-h-d-f5-matrix.pbtxt | 29 --- .../golden/v1/tensorflow.keras.utils.pbtxt | 4 - ...ensorflow.keras.utils.-h-d-f5-matrix.pbtxt | 29 --- .../golden/v2/tensorflow.keras.utils.pbtxt | 4 - 8 files changed, 2 insertions(+), 329 deletions(-) delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py index 0df15f368fa..e8759b35448 100644 --- a/tensorflow/python/keras/engine/data_adapter.py +++ b/tensorflow/python/keras/engine/data_adapter.py @@ -423,8 +423,8 @@ class TensorLikeDataAdapter(DataAdapter): class GenericArrayLikeDataAdapter(TensorLikeDataAdapter): """Adapter that handles array-like data without forcing it into memory. - As an example, this adapter handles `keras.utils.HDF5Matrix` which holds - datasets that may be too big to fully fit into memory. + This adapter handles array-like datasets that may be too big to fully + fit into memory. Specifically, this adapter handles any Python class which implements: `__get_item__`, `__len__`, `shape`, and `dtype` with the same meanings diff --git a/tensorflow/python/keras/utils/all_utils.py b/tensorflow/python/keras/utils/all_utils.py index 01ccefc2332..17b8fe98310 100644 --- a/tensorflow/python/keras/utils/all_utils.py +++ b/tensorflow/python/keras/utils/all_utils.py @@ -34,7 +34,6 @@ from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object from tensorflow.python.keras.utils.generic_utils import get_custom_objects from tensorflow.python.keras.utils.generic_utils import Progbar from tensorflow.python.keras.utils.generic_utils import serialize_keras_object -from tensorflow.python.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras.utils.layer_utils import get_source_inputs from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model from tensorflow.python.keras.utils.np_utils import normalize diff --git a/tensorflow/python/keras/utils/io_utils.py b/tensorflow/python/keras/utils/io_utils.py index 7c3395b239c..e70f8013ef8 100644 --- a/tensorflow/python/keras/utils/io_utils.py +++ b/tensorflow/python/keras/utils/io_utils.py @@ -18,21 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import os import sys -import numpy as np import six -from tensorflow.python.framework import tensor_spec -from tensorflow.python.framework import type_spec -from tensorflow.python.util import deprecation -from tensorflow.python.util.tf_export import keras_export - -try: - import h5py -except ImportError: - h5py = None if sys.version_info >= (3, 6): @@ -77,162 +66,6 @@ def path_to_string(path): return _path_to_string(path) -@keras_export('keras.utils.HDF5Matrix') -class HDF5Matrix(object): - """Representation of HDF5 dataset to be used instead of a Numpy array. - - THIS CLASS IS DEPRECATED. - Training with HDF5Matrix may not be optimized for performance, and might - not work with every distribution strategy. - - We recommend using https://github.com/tensorflow/io to load your - HDF5 data into a tf.data Dataset and passing that dataset to Keras. - """ - refs = collections.defaultdict(int) - - @deprecation.deprecated('2020-05-30', 'Training with ' - 'HDF5Matrix is not optimized for performance. ' - 'Instead, we recommend using ' - 'https://github.com/tensorflow/io to load your ' - 'HDF5 data into a tf.data Dataset and passing ' - 'that dataset to Keras.') - def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): - """Representation of HDF5 dataset to be used instead of a Numpy array. - - Example: - - ```python - x_data = HDF5Matrix('input/file.hdf5', 'data') - model.predict(x_data) - ``` - - Providing `start` and `end` allows use of a slice of the dataset. - - Optionally, a normalizer function (or lambda) can be given. This will - be called on every slice of data retrieved. - - Arguments: - datapath: string, path to a HDF5 file - dataset: string, name of the HDF5 dataset in the file specified - in datapath - start: int, start of desired slice of the specified dataset - end: int, end of desired slice of the specified dataset - normalizer: function to be called on data when retrieved - - Returns: - An array-like HDF5 dataset. - - Raises: - ImportError if HDF5 & h5py are not installed - """ - if h5py is None: - raise ImportError('The use of HDF5Matrix requires ' - 'HDF5 and h5py installed.') - - if datapath not in list(self.refs.keys()): - f = h5py.File(datapath) - self.refs[datapath] = f - else: - f = self.refs[datapath] - self.data = f[dataset] - self.start = start - if end is None: - self.end = self.data.shape[0] - else: - self.end = end - self.normalizer = normalizer - - def __len__(self): - return self.end - self.start - - def __getitem__(self, key): - if isinstance(key, slice): - start, stop = key.start, key.stop - if start is None: - start = 0 - if stop is None: - stop = self.shape[0] - if stop + self.start <= self.end: - idx = slice(start + self.start, stop + self.start) - else: - raise IndexError - elif isinstance(key, (int, np.integer)): - if key + self.start < self.end: - idx = key + self.start - else: - raise IndexError - elif isinstance(key, np.ndarray): - if np.max(key) + self.start < self.end: - idx = (self.start + key).tolist() - else: - raise IndexError - else: - # Assume list/iterable - if max(key) + self.start < self.end: - idx = [x + self.start for x in key] - else: - raise IndexError - if self.normalizer is not None: - return self.normalizer(self.data[idx]) - else: - return self.data[idx] - - @property - def shape(self): - """Gets a numpy-style shape tuple giving the dataset dimensions. - - Returns: - A numpy-style shape tuple. - """ - return (self.end - self.start,) + self.data.shape[1:] - - @property - def dtype(self): - """Gets the datatype of the dataset. - - Returns: - A numpy dtype string. - """ - return self.data.dtype - - @property - def ndim(self): - """Gets the number of dimensions (rank) of the dataset. - - Returns: - An integer denoting the number of dimensions (rank) of the dataset. - """ - return self.data.ndim - - @property - def size(self): - """Gets the total dataset size (number of elements). - - Returns: - An integer denoting the number of elements in the dataset. - """ - return np.prod(self.shape) - - @staticmethod - def _to_type_spec(value): - """Gets the Tensorflow TypeSpec corresponding to the passed dataset. - - Args: - value: A HDF5Matrix object. - - Returns: - A tf.TensorSpec. - """ - if not isinstance(value, HDF5Matrix): - raise TypeError('Expected value to be a HDF5Matrix, but saw: {}'.format( - type(value))) - return tensor_spec.TensorSpec(shape=value.shape, dtype=value.dtype) - - -type_spec.register_type_spec_from_value_converter(HDF5Matrix, - HDF5Matrix._to_type_spec) # pylint: disable=protected-access - - def ask_to_proceed_with_overwrite(filepath): """Produces a prompt asking about overwriting a file. diff --git a/tensorflow/python/keras/utils/io_utils_test.py b/tensorflow/python/keras/utils/io_utils_test.py index 29328e52dbc..a0ead4ee623 100644 --- a/tensorflow/python/keras/utils/io_utils_test.py +++ b/tensorflow/python/keras/utils/io_utils_test.py @@ -18,110 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os -import shutil import sys -import numpy as np import six -from tensorflow.python import keras from tensorflow.python.keras import keras_parameterized -from tensorflow.python.keras import testing_utils from tensorflow.python.keras.utils import io_utils from tensorflow.python.platform import test -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -def create_dataset(h5_path='test.h5'): - x = np.random.randn(200, 10).astype('float32') - y = np.random.randint(0, 2, size=(200, 1)) - f = h5py.File(h5_path, 'w') - # Creating dataset to store features - x_dset = f.create_dataset('my_data', (200, 10), dtype='f') - x_dset[:] = x - # Creating dataset to store labels - y_dset = f.create_dataset('my_labels', (200, 1), dtype='i') - y_dset[:] = y - f.close() - class TestIOUtils(keras_parameterized.TestCase): - @keras_parameterized.run_all_keras_modes - def test_HDF5Matrix(self): - if h5py is None: - return - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - - h5_path = os.path.join(temp_dir, 'test.h5') - create_dataset(h5_path) - - # Instantiating HDF5Matrix for the training set, - # which is a slice of the first 150 elements - x_train = io_utils.HDF5Matrix(h5_path, 'my_data', start=0, end=150) - y_train = io_utils.HDF5Matrix(h5_path, 'my_labels', start=0, end=150) - - # Likewise for the test set - x_test = io_utils.HDF5Matrix(h5_path, 'my_data', start=150, end=200) - y_test = io_utils.HDF5Matrix(h5_path, 'my_labels', start=150, end=200) - - # HDF5Matrix behave more or less like Numpy matrices - # with regard to indexing - self.assertEqual(y_train.shape, (150, 1)) - # But they do not support negative indices, so don't try print(x_train[-1]) - - self.assertEqual(y_train.dtype, np.dtype('i')) - self.assertEqual(y_train.ndim, 2) - self.assertEqual(y_train.size, 150) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu')) - model.add(keras.layers.Dense(1, activation='sigmoid')) - model.compile( - loss='binary_crossentropy', - optimizer='sgd', - run_eagerly=testing_utils.should_run_eagerly()) - - # Note: you have to use shuffle='batch' or False with HDF5Matrix - model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False) - # test that evaluation and prediction - # don't crash and return reasonable results - out_pred = model.predict(x_test, batch_size=32, verbose=False) - out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False) - - self.assertEqual(out_pred.shape, (50, 1)) - self.assertGreater(out_eval, 0) - - # test slicing for shortened array - self.assertEqual(len(x_train[0:]), len(x_train)) - - # test __getitem__ invalid use cases - with self.assertRaises(IndexError): - _ = x_train[1000] - with self.assertRaises(IndexError): - _ = x_train[1000: 1001] - with self.assertRaises(IndexError): - _ = x_train[[1000, 1001]] - with self.assertRaises(IndexError): - _ = x_train[six.moves.range(1000, 1001)] - with self.assertRaises(IndexError): - _ = x_train[np.array([1000])] - with self.assertRaises(TypeError): - _ = x_train[None] - - # test normalizer - normalizer = lambda x: x + 1 - normalized_x_train = io_utils.HDF5Matrix( - h5_path, 'my_data', start=0, end=150, normalizer=normalizer) - self.assertAllClose(normalized_x_train[0][0], x_train[0][0] + 1) - def test_ask_to_proceed_with_overwrite(self): with test.mock.patch.object(six.moves, 'input') as mock_log: mock_log.return_value = 'y' diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt deleted file mode 100644 index 6b832051a97..00000000000 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt +++ /dev/null @@ -1,29 +0,0 @@ -path: "tensorflow.keras.utils.HDF5Matrix" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "refs" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'datapath\', \'dataset\', \'start\', \'end\', \'normalizer\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\'], " - } -} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt index 8c7d4da2a40..f5726de015b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.utils.pbtxt @@ -8,10 +8,6 @@ tf_module { name: "GeneratorEnqueuer" mtype: "" } - member { - name: "HDF5Matrix" - mtype: "" - } member { name: "OrderedEnqueuer" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt deleted file mode 100644 index 6b832051a97..00000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.-h-d-f5-matrix.pbtxt +++ /dev/null @@ -1,29 +0,0 @@ -path: "tensorflow.keras.utils.HDF5Matrix" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "dtype" - mtype: "" - } - member { - name: "ndim" - mtype: "" - } - member { - name: "refs" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "size" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'datapath\', \'dataset\', \'start\', \'end\', \'normalizer\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\'], " - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt index 142221fce13..410fac46408 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.utils.pbtxt @@ -8,10 +8,6 @@ tf_module { name: "GeneratorEnqueuer" mtype: "" } - member { - name: "HDF5Matrix" - mtype: "" - } member { name: "OrderedEnqueuer" mtype: ""