83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""CIFAR10 small images classification dataset.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import os
|
|
|
|
import numpy as np
|
|
|
|
from tensorflow.python.keras import backend as K
|
|
from tensorflow.python.keras.datasets.cifar import load_batch
|
|
from tensorflow.python.keras.utils.data_utils import get_file
|
|
from tensorflow.python.util.tf_export import keras_export
|
|
|
|
|
|
@keras_export('keras.datasets.cifar10.load_data')
|
|
def load_data():
|
|
"""Loads [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).
|
|
|
|
This is a dataset of 50,000 32x32 color training images and 10,000 test
|
|
images, labeled over 10 categories. See more info at the
|
|
[CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).
|
|
|
|
Returns:
|
|
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
|
|
|
|
**x_train, x_test**: uint8 arrays of RGB image data with shape
|
|
`(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is
|
|
`'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format
|
|
is `'channels_last'`.
|
|
|
|
**y_train, y_test**: uint8 arrays of category labels
|
|
(integers in range 0-9) each with shape (num_samples, 1).
|
|
"""
|
|
dirname = 'cifar-10-batches-py'
|
|
origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
|
|
path = get_file(
|
|
dirname,
|
|
origin=origin,
|
|
untar=True,
|
|
file_hash=
|
|
'6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce')
|
|
|
|
num_train_samples = 50000
|
|
|
|
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
|
|
y_train = np.empty((num_train_samples,), dtype='uint8')
|
|
|
|
for i in range(1, 6):
|
|
fpath = os.path.join(path, 'data_batch_' + str(i))
|
|
(x_train[(i - 1) * 10000:i * 10000, :, :, :],
|
|
y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)
|
|
|
|
fpath = os.path.join(path, 'test_batch')
|
|
x_test, y_test = load_batch(fpath)
|
|
|
|
y_train = np.reshape(y_train, (len(y_train), 1))
|
|
y_test = np.reshape(y_test, (len(y_test), 1))
|
|
|
|
if K.image_data_format() == 'channels_last':
|
|
x_train = x_train.transpose(0, 2, 3, 1)
|
|
x_test = x_test.transpose(0, 2, 3, 1)
|
|
|
|
x_test = x_test.astype(x_train.dtype)
|
|
y_test = y_test.astype(y_train.dtype)
|
|
|
|
return (x_train, y_train), (x_test, y_test)
|