Remove outdated examples.

PiperOrigin-RevId: 265757996
This commit is contained in:
Mark Daoust 2019-08-27 13:49:08 -07:00 committed by TensorFlower Gardener
parent c7f4fe381a
commit 7b0523809e
7 changed files with 0 additions and 717 deletions

View File

@ -1,27 +0,0 @@
package(
licenses = ["notice"], # Apache 2.0
)
exports_files(["LICENSE"])
py_test(
name = "test",
size = "medium",
srcs = [
"custom_regression.py",
"dnn_regression.py",
"imports85.py",
"linear_regression_categorical.py",
"test.py",
],
python_version = "PY2",
srcs_version = "PY2AND3",
tags = [
"manual",
"notap",
],
deps = [
"//tensorflow:tensorflow_py",
"//third_party/py/numpy",
],
)

View File

@ -1,20 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A collection of regression examples using `Estimators`."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

View File

@ -1,162 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Regression using the DNNRegressor Estimator."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import imports85 # pylint: disable=g-bad-import-order
STEPS = 1000
PRICE_NORM_FACTOR = 1000
def my_dnn_regression_fn(features, labels, mode, params):
"""A model function implementing DNN regression for a custom Estimator."""
# Extract the input into a dense layer, according to the feature_columns.
top = tf.feature_column.input_layer(features, params["feature_columns"])
# Iterate over the "hidden_units" list of layer sizes, default is [20].
for units in params.get("hidden_units", [20]):
# Add a hidden layer, densely connected on top of the previous layer.
top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu)
# Connect a linear output layer on top.
output_layer = tf.layers.dense(inputs=top, units=1)
# Reshape the output layer to a 1-dim Tensor to return predictions
predictions = tf.squeeze(output_layer, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
# In `PREDICT` mode we only need to return predictions.
return tf.estimator.EstimatorSpec(
mode=mode, predictions={"price": predictions})
# Calculate loss using mean squared error
average_loss = tf.losses.mean_squared_error(labels, predictions)
# Pre-made estimators use the total_loss instead of the average,
# so report total_loss for compatibility.
batch_size = tf.shape(labels)[0]
total_loss = tf.to_float(batch_size) * average_loss
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = params.get("optimizer", tf.train.AdamOptimizer)
optimizer = optimizer(params.get("learning_rate", None))
train_op = optimizer.minimize(
loss=average_loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op)
# In evaluation mode we will calculate evaluation metrics.
assert mode == tf.estimator.ModeKeys.EVAL
# Calculate root mean squared error
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
# Add the rmse to the collection of evaluation metrics.
eval_metrics = {"rmse": rmse}
return tf.estimator.EstimatorSpec(
mode=mode,
# Report sum of error for compatibility with pre-made estimators
loss=total_loss,
eval_metric_ops=eval_metrics)
def main(argv):
"""Builds, trains, and evaluates the model."""
assert len(argv) == 1
(train, test) = imports85.dataset()
# Switch the labels to units of thousands for better convergence.
def normalize_price(features, labels):
return features, labels / PRICE_NORM_FACTOR
train = train.map(normalize_price)
test = test.map(normalize_price)
# Build the training input_fn.
def input_train():
return (
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
train.shuffle(1000).batch(128)
# Repeat forever
.repeat())
# Build the validation input_fn.
def input_test():
return test.shuffle(1000).batch(128)
# The first way assigns a unique weight to each category. To do this you must
# specify the category's vocabulary (values outside this specification will
# receive a weight of zero). Here we specify the vocabulary using a list of
# options. The vocabulary can also be specified with a vocabulary file (using
# `categorical_column_with_vocabulary_file`). For features covering a
# range of positive integers use `categorical_column_with_identity`.
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
body_style = tf.feature_column.categorical_column_with_vocabulary_list(
key="body-style", vocabulary_list=body_style_vocab)
make = tf.feature_column.categorical_column_with_hash_bucket(
key="make", hash_bucket_size=50)
feature_columns = [
tf.feature_column.numeric_column(key="curb-weight"),
tf.feature_column.numeric_column(key="highway-mpg"),
# Since this is a DNN model, convert categorical columns from sparse
# to dense.
# Wrap them in an `indicator_column` to create a
# one-hot vector from the input.
tf.feature_column.indicator_column(body_style),
# Or use an `embedding_column` to create a trainable vector for each
# index.
tf.feature_column.embedding_column(make, dimension=3),
]
# Build a custom Estimator, using the model_fn.
# `params` is passed through to the `model_fn`.
model = tf.estimator.Estimator(
model_fn=my_dnn_regression_fn,
params={
"feature_columns": feature_columns,
"learning_rate": 0.001,
"optimizer": tf.train.AdamOptimizer,
"hidden_units": [20, 20]
})
# Train the model.
model.train(input_fn=input_train, steps=STEPS)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=input_test)
# Print the Root Mean Square Error (RMSE).
print("\n" + 80 * "*")
print("\nRMS error for the test set: ${:.0f}"
.format(PRICE_NORM_FACTOR * eval_result["rmse"]))
print()
if __name__ == "__main__":
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main=main)

View File

@ -1,105 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Regression using the DNNRegressor Estimator."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import imports85 # pylint: disable=g-bad-import-order
STEPS = 5000
PRICE_NORM_FACTOR = 1000
def main(argv):
"""Builds, trains, and evaluates the model."""
assert len(argv) == 1
(train, test) = imports85.dataset()
# Switch the labels to units of thousands for better convergence.
def normalize_price(features, labels):
return features, labels / PRICE_NORM_FACTOR
train = train.map(normalize_price)
test = test.map(normalize_price)
# Build the training input_fn.
def input_train():
return (
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
train.shuffle(1000).batch(128)
# Repeat forever
.repeat())
# Build the validation input_fn.
def input_test():
return test.shuffle(1000).batch(128)
# The first way assigns a unique weight to each category. To do this you must
# specify the category's vocabulary (values outside this specification will
# receive a weight of zero). Here we specify the vocabulary using a list of
# options. The vocabulary can also be specified with a vocabulary file (using
# `categorical_column_with_vocabulary_file`). For features covering a
# range of positive integers use `categorical_column_with_identity`.
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
body_style = tf.feature_column.categorical_column_with_vocabulary_list(
key="body-style", vocabulary_list=body_style_vocab)
make = tf.feature_column.categorical_column_with_hash_bucket(
key="make", hash_bucket_size=50)
feature_columns = [
tf.feature_column.numeric_column(key="curb-weight"),
tf.feature_column.numeric_column(key="highway-mpg"),
# Since this is a DNN model, convert categorical columns from sparse
# to dense.
# Wrap them in an `indicator_column` to create a
# one-hot vector from the input.
tf.feature_column.indicator_column(body_style),
# Or use an `embedding_column` to create a trainable vector for each
# index.
tf.feature_column.embedding_column(make, dimension=3),
]
# Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
# defined above as input.
model = tf.estimator.DNNRegressor(
hidden_units=[20, 20], feature_columns=feature_columns)
# Train the model.
model.train(input_fn=input_train, steps=STEPS)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=input_test)
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
average_loss = eval_result["average_loss"]
# Convert MSE to Root Mean Square Error (RMSE).
print("\n" + 80 * "*")
print("\nRMS error for the test set: ${:.0f}"
.format(PRICE_NORM_FACTOR * average_loss**0.5))
print()
if __name__ == "__main__":
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main=main)

View File

@ -1,204 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A dataset loader for imports85.data."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import tensorflow as tf
try:
import pandas as pd # pylint: disable=g-import-not-at-top
except ImportError:
pass
URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
# Order is important for the csv-readers, so we use an OrderedDict here.
defaults = collections.OrderedDict([
("symboling", [0]),
("normalized-losses", [0.0]),
("make", [""]),
("fuel-type", [""]),
("aspiration", [""]),
("num-of-doors", [""]),
("body-style", [""]),
("drive-wheels", [""]),
("engine-location", [""]),
("wheel-base", [0.0]),
("length", [0.0]),
("width", [0.0]),
("height", [0.0]),
("curb-weight", [0.0]),
("engine-type", [""]),
("num-of-cylinders", [""]),
("engine-size", [0.0]),
("fuel-system", [""]),
("bore", [0.0]),
("stroke", [0.0]),
("compression-ratio", [0.0]),
("horsepower", [0.0]),
("peak-rpm", [0.0]),
("city-mpg", [0.0]),
("highway-mpg", [0.0]),
("price", [0.0])
]) # pyformat: disable
types = collections.OrderedDict((key, type(value[0]))
for key, value in defaults.items())
def _get_imports85():
path = tf.contrib.keras.utils.get_file(URL.split("/")[-1], URL)
return path
def dataset(y_name="price", train_fraction=0.7):
"""Load the imports85 data as a (train,test) pair of `Dataset`.
Each dataset generates (features_dict, label) pairs.
Args:
y_name: The name of the column to use as the label.
train_fraction: A float, the fraction of data to use for training. The
remainder will be used for evaluation.
Returns:
A (train,test) pair of `Datasets`
"""
# Download and cache the data
path = _get_imports85()
# Define how the lines of the file should be parsed
def decode_line(line):
"""Convert a csv line into a (features_dict,label) pair."""
# Decode the line to a tuple of items based on the types of
# csv_header.values().
items = tf.decode_csv(line, list(defaults.values()))
# Convert the keys and items to a dict.
pairs = zip(defaults.keys(), items)
features_dict = dict(pairs)
# Remove the label from the features_dict
label = features_dict.pop(y_name)
return features_dict, label
def has_no_question_marks(line):
"""Returns True if the line of text has no question marks."""
# split the line into an array of characters
chars = tf.string_split(line[tf.newaxis], "").values
# for each character check if it is a question mark
is_question = tf.equal(chars, "?")
any_question = tf.reduce_any(is_question)
no_question = ~any_question
return no_question
def in_training_set(line):
"""Returns a boolean tensor, true if the line is in the training set."""
# If you randomly split the dataset you won't get the same split in both
# sessions if you stop and restart training later. Also a simple
# random split won't work with a dataset that's too big to `.cache()` as
# we are doing here.
num_buckets = 1000000
bucket_id = tf.string_to_hash_bucket_fast(line, num_buckets)
# Use the hash bucket id as a random number that's deterministic per example
return bucket_id < int(train_fraction * num_buckets)
def in_test_set(line):
"""Returns a boolean tensor, true if the line is in the training set."""
# Items not in the training set are in the test set.
# This line must use `~` instead of `not` because `not` only works on python
# booleans but we are dealing with symbolic tensors.
return ~in_training_set(line)
base_dataset = (
tf.data
# Get the lines from the file.
.TextLineDataset(path)
# drop lines with question marks.
.filter(has_no_question_marks))
train = (base_dataset
# Take only the training-set lines.
.filter(in_training_set)
# Decode each line into a (features_dict, label) pair.
.map(decode_line)
# Cache data so you only decode the file once.
.cache())
# Do the same for the test-set.
test = (base_dataset.filter(in_test_set).cache().map(decode_line))
return train, test
def raw_dataframe():
"""Load the imports85 data as a pd.DataFrame."""
# Download and cache the data
path = _get_imports85()
# Load it into a pandas dataframe
df = pd.read_csv(path, names=types.keys(), dtype=types, na_values="?")
return df
def load_data(y_name="price", train_fraction=0.7, seed=None):
"""Get the imports85 data set.
A description of the data is available at:
https://archive.ics.uci.edu/ml/datasets/automobile
The data itself can be found at:
https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data
Args:
y_name: the column to return as the label.
train_fraction: the fraction of the dataset to use for training.
seed: The random seed to use when shuffling the data. `None` generates a
unique shuffle every run.
Returns:
a pair of pairs where the first pair is the training data, and the second
is the test data:
`(x_train, y_train), (x_test, y_test) = get_imports85_dataset(...)`
`x` contains a pandas DataFrame of features, while `y` contains the label
array.
"""
# Load the raw data columns.
data = raw_dataframe()
# Delete rows with unknowns
data = data.dropna()
# Shuffle the data
np.random.seed(seed)
# Split the data into train/test subsets.
x_train = data.sample(frac=train_fraction, random_state=seed)
x_test = data.drop(x_train.index)
# Extract the label from the features dataframe.
y_train = x_train.pop(y_name)
y_test = x_test.pop(y_name)
return (x_train, y_train), (x_test, y_test)

View File

@ -1,110 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Linear regression with categorical features."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import imports85 # pylint: disable=g-bad-import-order
STEPS = 1000
PRICE_NORM_FACTOR = 1000
def main(argv):
"""Builds, trains, and evaluates the model."""
assert len(argv) == 1
(train, test) = imports85.dataset()
# Switch the labels to units of thousands for better convergence.
def normalize_price(features, labels):
return features, labels / PRICE_NORM_FACTOR
train = train.map(normalize_price)
test = test.map(normalize_price)
# Build the training input_fn.
def input_train():
return (
# Shuffling with a buffer larger than the data set ensures
# that the examples are well mixed.
train.shuffle(1000).batch(128)
# Repeat forever
.repeat())
# Build the validation input_fn.
def input_test():
return test.shuffle(1000).batch(128)
# The following code demonstrates two of the ways that `feature_columns` can
# be used to build a model with categorical inputs.
# The first way assigns a unique weight to each category. To do this, you must
# specify the category's vocabulary (values outside this specification will
# receive a weight of zero).
# Alternatively, you can define the vocabulary in a file (by calling
# `categorical_column_with_vocabulary_file`) or as a range of positive
# integers (by calling `categorical_column_with_identity`)
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
key="body-style", vocabulary_list=body_style_vocab)
# The second way, appropriate for an unspecified vocabulary, is to create a
# hashed column. It will create a fixed length list of weights, and
# automatically assign each input category to a weight. Due to the
# pseudo-randomness of the process, some weights may be shared between
# categories, while others will remain unused.
make_column = tf.feature_column.categorical_column_with_hash_bucket(
key="make", hash_bucket_size=50)
feature_columns = [
# This model uses the same two numeric features as `linear_regressor.py`
tf.feature_column.numeric_column(key="curb-weight"),
tf.feature_column.numeric_column(key="highway-mpg"),
# This model adds two categorical colums that will adjust the price based
# on "make" and "body-style".
body_style_column,
make_column,
]
# Build the Estimator.
model = tf.estimator.LinearRegressor(feature_columns=feature_columns)
# Train the model.
# By default, the Estimators log output every 100 steps.
model.train(input_fn=input_train, steps=STEPS)
# Evaluate how the model performs on data it has not yet seen.
eval_result = model.evaluate(input_fn=input_test)
# The evaluation returns a Python dictionary. The "average_loss" key holds the
# Mean Squared Error (MSE).
average_loss = eval_result["average_loss"]
# Convert MSE to Root Mean Square Error (RMSE).
print("\n" + 80 * "*")
print("\nRMS error for the test set: ${:.0f}"
.format(PRICE_NORM_FACTOR * average_loss**0.5))
print()
if __name__ == "__main__":
# The Estimator periodically generates "INFO" logs; make these logs visible.
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main=main)

View File

@ -1,89 +0,0 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A simple smoke test that runs these examples for 1 training iteration."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import pandas as pd
from six.moves import StringIO
import tensorflow.examples.get_started.regression.imports85 as imports85
sys.modules["imports85"] = imports85
# pylint: disable=g-bad-import-order,g-import-not-at-top
import tensorflow.data as data
import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression
import tensorflow.examples.get_started.regression.linear_regression_categorical as linear_regression_categorical
import tensorflow.examples.get_started.regression.custom_regression as custom_regression
from tensorflow.python.platform import googletest
from tensorflow.python.platform import test
# pylint: disable=g-bad-import-order,g-import-not-at-top
# pylint: disable=line-too-long
FOUR_LINES = "\n".join([
"1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500",
"2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950",
"2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450",
"2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250",
])
# pylint: enable=line-too-long
def four_lines_dataframe():
text = StringIO(FOUR_LINES)
return pd.read_csv(
text, names=imports85.types.keys(), dtype=imports85.types, na_values="?")
def four_lines_dataset(*args, **kwargs):
del args, kwargs
return data.Dataset.from_tensor_slices(FOUR_LINES.split("\n"))
class RegressionTest(googletest.TestCase):
"""Test the regression examples in this directory."""
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
@test.mock.patch.dict(linear_regression_categorical.__dict__, {"STEPS": 1})
def test_linear_regression_categorical(self):
linear_regression_categorical.main([""])
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
@test.mock.patch.dict(dnn_regression.__dict__, {"STEPS": 1})
def test_dnn_regression(self):
dnn_regression.main([""])
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
@test.mock.patch.dict(custom_regression.__dict__, {"STEPS": 1})
def test_custom_regression(self):
custom_regression.main([""])
if __name__ == "__main__":
googletest.main()