Remove outdated examples.
PiperOrigin-RevId: 265757996
This commit is contained in:
parent
c7f4fe381a
commit
7b0523809e
@ -1,27 +0,0 @@
|
||||
package(
|
||||
licenses = ["notice"], # Apache 2.0
|
||||
)
|
||||
|
||||
exports_files(["LICENSE"])
|
||||
|
||||
py_test(
|
||||
name = "test",
|
||||
size = "medium",
|
||||
srcs = [
|
||||
"custom_regression.py",
|
||||
"dnn_regression.py",
|
||||
"imports85.py",
|
||||
"linear_regression_categorical.py",
|
||||
"test.py",
|
||||
],
|
||||
python_version = "PY2",
|
||||
srcs_version = "PY2AND3",
|
||||
tags = [
|
||||
"manual",
|
||||
"notap",
|
||||
],
|
||||
deps = [
|
||||
"//tensorflow:tensorflow_py",
|
||||
"//third_party/py/numpy",
|
||||
],
|
||||
)
|
@ -1,20 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
|
||||
"""A collection of regression examples using `Estimators`."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
@ -1,162 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Regression using the DNNRegressor Estimator."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import imports85 # pylint: disable=g-bad-import-order
|
||||
|
||||
STEPS = 1000
|
||||
PRICE_NORM_FACTOR = 1000
|
||||
|
||||
|
||||
def my_dnn_regression_fn(features, labels, mode, params):
|
||||
"""A model function implementing DNN regression for a custom Estimator."""
|
||||
|
||||
# Extract the input into a dense layer, according to the feature_columns.
|
||||
top = tf.feature_column.input_layer(features, params["feature_columns"])
|
||||
|
||||
# Iterate over the "hidden_units" list of layer sizes, default is [20].
|
||||
for units in params.get("hidden_units", [20]):
|
||||
# Add a hidden layer, densely connected on top of the previous layer.
|
||||
top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.relu)
|
||||
|
||||
# Connect a linear output layer on top.
|
||||
output_layer = tf.layers.dense(inputs=top, units=1)
|
||||
|
||||
# Reshape the output layer to a 1-dim Tensor to return predictions
|
||||
predictions = tf.squeeze(output_layer, 1)
|
||||
|
||||
if mode == tf.estimator.ModeKeys.PREDICT:
|
||||
# In `PREDICT` mode we only need to return predictions.
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode=mode, predictions={"price": predictions})
|
||||
|
||||
# Calculate loss using mean squared error
|
||||
average_loss = tf.losses.mean_squared_error(labels, predictions)
|
||||
|
||||
# Pre-made estimators use the total_loss instead of the average,
|
||||
# so report total_loss for compatibility.
|
||||
batch_size = tf.shape(labels)[0]
|
||||
total_loss = tf.to_float(batch_size) * average_loss
|
||||
|
||||
if mode == tf.estimator.ModeKeys.TRAIN:
|
||||
optimizer = params.get("optimizer", tf.train.AdamOptimizer)
|
||||
optimizer = optimizer(params.get("learning_rate", None))
|
||||
train_op = optimizer.minimize(
|
||||
loss=average_loss, global_step=tf.train.get_global_step())
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode=mode, loss=total_loss, train_op=train_op)
|
||||
|
||||
# In evaluation mode we will calculate evaluation metrics.
|
||||
assert mode == tf.estimator.ModeKeys.EVAL
|
||||
|
||||
# Calculate root mean squared error
|
||||
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
|
||||
|
||||
# Add the rmse to the collection of evaluation metrics.
|
||||
eval_metrics = {"rmse": rmse}
|
||||
|
||||
return tf.estimator.EstimatorSpec(
|
||||
mode=mode,
|
||||
# Report sum of error for compatibility with pre-made estimators
|
||||
loss=total_loss,
|
||||
eval_metric_ops=eval_metrics)
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""Builds, trains, and evaluates the model."""
|
||||
assert len(argv) == 1
|
||||
(train, test) = imports85.dataset()
|
||||
|
||||
# Switch the labels to units of thousands for better convergence.
|
||||
def normalize_price(features, labels):
|
||||
return features, labels / PRICE_NORM_FACTOR
|
||||
|
||||
train = train.map(normalize_price)
|
||||
test = test.map(normalize_price)
|
||||
|
||||
# Build the training input_fn.
|
||||
def input_train():
|
||||
return (
|
||||
# Shuffling with a buffer larger than the data set ensures
|
||||
# that the examples are well mixed.
|
||||
train.shuffle(1000).batch(128)
|
||||
# Repeat forever
|
||||
.repeat())
|
||||
|
||||
# Build the validation input_fn.
|
||||
def input_test():
|
||||
return test.shuffle(1000).batch(128)
|
||||
|
||||
# The first way assigns a unique weight to each category. To do this you must
|
||||
# specify the category's vocabulary (values outside this specification will
|
||||
# receive a weight of zero). Here we specify the vocabulary using a list of
|
||||
# options. The vocabulary can also be specified with a vocabulary file (using
|
||||
# `categorical_column_with_vocabulary_file`). For features covering a
|
||||
# range of positive integers use `categorical_column_with_identity`.
|
||||
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
|
||||
body_style = tf.feature_column.categorical_column_with_vocabulary_list(
|
||||
key="body-style", vocabulary_list=body_style_vocab)
|
||||
make = tf.feature_column.categorical_column_with_hash_bucket(
|
||||
key="make", hash_bucket_size=50)
|
||||
|
||||
feature_columns = [
|
||||
tf.feature_column.numeric_column(key="curb-weight"),
|
||||
tf.feature_column.numeric_column(key="highway-mpg"),
|
||||
# Since this is a DNN model, convert categorical columns from sparse
|
||||
# to dense.
|
||||
# Wrap them in an `indicator_column` to create a
|
||||
# one-hot vector from the input.
|
||||
tf.feature_column.indicator_column(body_style),
|
||||
# Or use an `embedding_column` to create a trainable vector for each
|
||||
# index.
|
||||
tf.feature_column.embedding_column(make, dimension=3),
|
||||
]
|
||||
|
||||
# Build a custom Estimator, using the model_fn.
|
||||
# `params` is passed through to the `model_fn`.
|
||||
model = tf.estimator.Estimator(
|
||||
model_fn=my_dnn_regression_fn,
|
||||
params={
|
||||
"feature_columns": feature_columns,
|
||||
"learning_rate": 0.001,
|
||||
"optimizer": tf.train.AdamOptimizer,
|
||||
"hidden_units": [20, 20]
|
||||
})
|
||||
|
||||
# Train the model.
|
||||
model.train(input_fn=input_train, steps=STEPS)
|
||||
|
||||
# Evaluate how the model performs on data it has not yet seen.
|
||||
eval_result = model.evaluate(input_fn=input_test)
|
||||
|
||||
# Print the Root Mean Square Error (RMSE).
|
||||
print("\n" + 80 * "*")
|
||||
print("\nRMS error for the test set: ${:.0f}"
|
||||
.format(PRICE_NORM_FACTOR * eval_result["rmse"]))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# The Estimator periodically generates "INFO" logs; make these logs visible.
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run(main=main)
|
@ -1,105 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Regression using the DNNRegressor Estimator."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import imports85 # pylint: disable=g-bad-import-order
|
||||
|
||||
STEPS = 5000
|
||||
PRICE_NORM_FACTOR = 1000
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""Builds, trains, and evaluates the model."""
|
||||
assert len(argv) == 1
|
||||
(train, test) = imports85.dataset()
|
||||
|
||||
# Switch the labels to units of thousands for better convergence.
|
||||
def normalize_price(features, labels):
|
||||
return features, labels / PRICE_NORM_FACTOR
|
||||
|
||||
train = train.map(normalize_price)
|
||||
test = test.map(normalize_price)
|
||||
|
||||
# Build the training input_fn.
|
||||
def input_train():
|
||||
return (
|
||||
# Shuffling with a buffer larger than the data set ensures
|
||||
# that the examples are well mixed.
|
||||
train.shuffle(1000).batch(128)
|
||||
# Repeat forever
|
||||
.repeat())
|
||||
|
||||
# Build the validation input_fn.
|
||||
def input_test():
|
||||
return test.shuffle(1000).batch(128)
|
||||
|
||||
# The first way assigns a unique weight to each category. To do this you must
|
||||
# specify the category's vocabulary (values outside this specification will
|
||||
# receive a weight of zero). Here we specify the vocabulary using a list of
|
||||
# options. The vocabulary can also be specified with a vocabulary file (using
|
||||
# `categorical_column_with_vocabulary_file`). For features covering a
|
||||
# range of positive integers use `categorical_column_with_identity`.
|
||||
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
|
||||
body_style = tf.feature_column.categorical_column_with_vocabulary_list(
|
||||
key="body-style", vocabulary_list=body_style_vocab)
|
||||
make = tf.feature_column.categorical_column_with_hash_bucket(
|
||||
key="make", hash_bucket_size=50)
|
||||
|
||||
feature_columns = [
|
||||
tf.feature_column.numeric_column(key="curb-weight"),
|
||||
tf.feature_column.numeric_column(key="highway-mpg"),
|
||||
# Since this is a DNN model, convert categorical columns from sparse
|
||||
# to dense.
|
||||
# Wrap them in an `indicator_column` to create a
|
||||
# one-hot vector from the input.
|
||||
tf.feature_column.indicator_column(body_style),
|
||||
# Or use an `embedding_column` to create a trainable vector for each
|
||||
# index.
|
||||
tf.feature_column.embedding_column(make, dimension=3),
|
||||
]
|
||||
|
||||
# Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
|
||||
# defined above as input.
|
||||
model = tf.estimator.DNNRegressor(
|
||||
hidden_units=[20, 20], feature_columns=feature_columns)
|
||||
|
||||
# Train the model.
|
||||
model.train(input_fn=input_train, steps=STEPS)
|
||||
|
||||
# Evaluate how the model performs on data it has not yet seen.
|
||||
eval_result = model.evaluate(input_fn=input_test)
|
||||
|
||||
# The evaluation returns a Python dictionary. The "average_loss" key holds the
|
||||
# Mean Squared Error (MSE).
|
||||
average_loss = eval_result["average_loss"]
|
||||
|
||||
# Convert MSE to Root Mean Square Error (RMSE).
|
||||
print("\n" + 80 * "*")
|
||||
print("\nRMS error for the test set: ${:.0f}"
|
||||
.format(PRICE_NORM_FACTOR * average_loss**0.5))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# The Estimator periodically generates "INFO" logs; make these logs visible.
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run(main=main)
|
@ -1,204 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A dataset loader for imports85.data."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
try:
|
||||
import pandas as pd # pylint: disable=g-import-not-at-top
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
|
||||
|
||||
# Order is important for the csv-readers, so we use an OrderedDict here.
|
||||
defaults = collections.OrderedDict([
|
||||
("symboling", [0]),
|
||||
("normalized-losses", [0.0]),
|
||||
("make", [""]),
|
||||
("fuel-type", [""]),
|
||||
("aspiration", [""]),
|
||||
("num-of-doors", [""]),
|
||||
("body-style", [""]),
|
||||
("drive-wheels", [""]),
|
||||
("engine-location", [""]),
|
||||
("wheel-base", [0.0]),
|
||||
("length", [0.0]),
|
||||
("width", [0.0]),
|
||||
("height", [0.0]),
|
||||
("curb-weight", [0.0]),
|
||||
("engine-type", [""]),
|
||||
("num-of-cylinders", [""]),
|
||||
("engine-size", [0.0]),
|
||||
("fuel-system", [""]),
|
||||
("bore", [0.0]),
|
||||
("stroke", [0.0]),
|
||||
("compression-ratio", [0.0]),
|
||||
("horsepower", [0.0]),
|
||||
("peak-rpm", [0.0]),
|
||||
("city-mpg", [0.0]),
|
||||
("highway-mpg", [0.0]),
|
||||
("price", [0.0])
|
||||
]) # pyformat: disable
|
||||
|
||||
|
||||
types = collections.OrderedDict((key, type(value[0]))
|
||||
for key, value in defaults.items())
|
||||
|
||||
|
||||
def _get_imports85():
|
||||
path = tf.contrib.keras.utils.get_file(URL.split("/")[-1], URL)
|
||||
return path
|
||||
|
||||
|
||||
def dataset(y_name="price", train_fraction=0.7):
|
||||
"""Load the imports85 data as a (train,test) pair of `Dataset`.
|
||||
|
||||
Each dataset generates (features_dict, label) pairs.
|
||||
|
||||
Args:
|
||||
y_name: The name of the column to use as the label.
|
||||
train_fraction: A float, the fraction of data to use for training. The
|
||||
remainder will be used for evaluation.
|
||||
Returns:
|
||||
A (train,test) pair of `Datasets`
|
||||
"""
|
||||
# Download and cache the data
|
||||
path = _get_imports85()
|
||||
|
||||
# Define how the lines of the file should be parsed
|
||||
def decode_line(line):
|
||||
"""Convert a csv line into a (features_dict,label) pair."""
|
||||
# Decode the line to a tuple of items based on the types of
|
||||
# csv_header.values().
|
||||
items = tf.decode_csv(line, list(defaults.values()))
|
||||
|
||||
# Convert the keys and items to a dict.
|
||||
pairs = zip(defaults.keys(), items)
|
||||
features_dict = dict(pairs)
|
||||
|
||||
# Remove the label from the features_dict
|
||||
label = features_dict.pop(y_name)
|
||||
|
||||
return features_dict, label
|
||||
|
||||
def has_no_question_marks(line):
|
||||
"""Returns True if the line of text has no question marks."""
|
||||
# split the line into an array of characters
|
||||
chars = tf.string_split(line[tf.newaxis], "").values
|
||||
# for each character check if it is a question mark
|
||||
is_question = tf.equal(chars, "?")
|
||||
any_question = tf.reduce_any(is_question)
|
||||
no_question = ~any_question
|
||||
|
||||
return no_question
|
||||
|
||||
def in_training_set(line):
|
||||
"""Returns a boolean tensor, true if the line is in the training set."""
|
||||
# If you randomly split the dataset you won't get the same split in both
|
||||
# sessions if you stop and restart training later. Also a simple
|
||||
# random split won't work with a dataset that's too big to `.cache()` as
|
||||
# we are doing here.
|
||||
num_buckets = 1000000
|
||||
bucket_id = tf.string_to_hash_bucket_fast(line, num_buckets)
|
||||
# Use the hash bucket id as a random number that's deterministic per example
|
||||
return bucket_id < int(train_fraction * num_buckets)
|
||||
|
||||
def in_test_set(line):
|
||||
"""Returns a boolean tensor, true if the line is in the training set."""
|
||||
# Items not in the training set are in the test set.
|
||||
# This line must use `~` instead of `not` because `not` only works on python
|
||||
# booleans but we are dealing with symbolic tensors.
|
||||
return ~in_training_set(line)
|
||||
|
||||
base_dataset = (
|
||||
tf.data
|
||||
# Get the lines from the file.
|
||||
.TextLineDataset(path)
|
||||
# drop lines with question marks.
|
||||
.filter(has_no_question_marks))
|
||||
|
||||
train = (base_dataset
|
||||
# Take only the training-set lines.
|
||||
.filter(in_training_set)
|
||||
# Decode each line into a (features_dict, label) pair.
|
||||
.map(decode_line)
|
||||
# Cache data so you only decode the file once.
|
||||
.cache())
|
||||
|
||||
# Do the same for the test-set.
|
||||
test = (base_dataset.filter(in_test_set).cache().map(decode_line))
|
||||
|
||||
return train, test
|
||||
|
||||
|
||||
def raw_dataframe():
|
||||
"""Load the imports85 data as a pd.DataFrame."""
|
||||
# Download and cache the data
|
||||
path = _get_imports85()
|
||||
|
||||
# Load it into a pandas dataframe
|
||||
df = pd.read_csv(path, names=types.keys(), dtype=types, na_values="?")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def load_data(y_name="price", train_fraction=0.7, seed=None):
|
||||
"""Get the imports85 data set.
|
||||
|
||||
A description of the data is available at:
|
||||
https://archive.ics.uci.edu/ml/datasets/automobile
|
||||
|
||||
The data itself can be found at:
|
||||
https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data
|
||||
|
||||
Args:
|
||||
y_name: the column to return as the label.
|
||||
train_fraction: the fraction of the dataset to use for training.
|
||||
seed: The random seed to use when shuffling the data. `None` generates a
|
||||
unique shuffle every run.
|
||||
Returns:
|
||||
a pair of pairs where the first pair is the training data, and the second
|
||||
is the test data:
|
||||
`(x_train, y_train), (x_test, y_test) = get_imports85_dataset(...)`
|
||||
`x` contains a pandas DataFrame of features, while `y` contains the label
|
||||
array.
|
||||
"""
|
||||
# Load the raw data columns.
|
||||
data = raw_dataframe()
|
||||
|
||||
# Delete rows with unknowns
|
||||
data = data.dropna()
|
||||
|
||||
# Shuffle the data
|
||||
np.random.seed(seed)
|
||||
|
||||
# Split the data into train/test subsets.
|
||||
x_train = data.sample(frac=train_fraction, random_state=seed)
|
||||
x_test = data.drop(x_train.index)
|
||||
|
||||
# Extract the label from the features dataframe.
|
||||
y_train = x_train.pop(y_name)
|
||||
y_test = x_test.pop(y_name)
|
||||
|
||||
return (x_train, y_train), (x_test, y_test)
|
@ -1,110 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Linear regression with categorical features."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import imports85 # pylint: disable=g-bad-import-order
|
||||
|
||||
STEPS = 1000
|
||||
PRICE_NORM_FACTOR = 1000
|
||||
|
||||
|
||||
def main(argv):
|
||||
"""Builds, trains, and evaluates the model."""
|
||||
assert len(argv) == 1
|
||||
(train, test) = imports85.dataset()
|
||||
|
||||
# Switch the labels to units of thousands for better convergence.
|
||||
def normalize_price(features, labels):
|
||||
return features, labels / PRICE_NORM_FACTOR
|
||||
|
||||
train = train.map(normalize_price)
|
||||
test = test.map(normalize_price)
|
||||
|
||||
# Build the training input_fn.
|
||||
def input_train():
|
||||
return (
|
||||
# Shuffling with a buffer larger than the data set ensures
|
||||
# that the examples are well mixed.
|
||||
train.shuffle(1000).batch(128)
|
||||
# Repeat forever
|
||||
.repeat())
|
||||
|
||||
# Build the validation input_fn.
|
||||
def input_test():
|
||||
return test.shuffle(1000).batch(128)
|
||||
|
||||
# The following code demonstrates two of the ways that `feature_columns` can
|
||||
# be used to build a model with categorical inputs.
|
||||
|
||||
# The first way assigns a unique weight to each category. To do this, you must
|
||||
# specify the category's vocabulary (values outside this specification will
|
||||
# receive a weight of zero).
|
||||
# Alternatively, you can define the vocabulary in a file (by calling
|
||||
# `categorical_column_with_vocabulary_file`) or as a range of positive
|
||||
# integers (by calling `categorical_column_with_identity`)
|
||||
body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
|
||||
body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
|
||||
key="body-style", vocabulary_list=body_style_vocab)
|
||||
|
||||
# The second way, appropriate for an unspecified vocabulary, is to create a
|
||||
# hashed column. It will create a fixed length list of weights, and
|
||||
# automatically assign each input category to a weight. Due to the
|
||||
# pseudo-randomness of the process, some weights may be shared between
|
||||
# categories, while others will remain unused.
|
||||
make_column = tf.feature_column.categorical_column_with_hash_bucket(
|
||||
key="make", hash_bucket_size=50)
|
||||
|
||||
feature_columns = [
|
||||
# This model uses the same two numeric features as `linear_regressor.py`
|
||||
tf.feature_column.numeric_column(key="curb-weight"),
|
||||
tf.feature_column.numeric_column(key="highway-mpg"),
|
||||
# This model adds two categorical colums that will adjust the price based
|
||||
# on "make" and "body-style".
|
||||
body_style_column,
|
||||
make_column,
|
||||
]
|
||||
|
||||
# Build the Estimator.
|
||||
model = tf.estimator.LinearRegressor(feature_columns=feature_columns)
|
||||
|
||||
# Train the model.
|
||||
# By default, the Estimators log output every 100 steps.
|
||||
model.train(input_fn=input_train, steps=STEPS)
|
||||
|
||||
# Evaluate how the model performs on data it has not yet seen.
|
||||
eval_result = model.evaluate(input_fn=input_test)
|
||||
|
||||
# The evaluation returns a Python dictionary. The "average_loss" key holds the
|
||||
# Mean Squared Error (MSE).
|
||||
average_loss = eval_result["average_loss"]
|
||||
|
||||
# Convert MSE to Root Mean Square Error (RMSE).
|
||||
print("\n" + 80 * "*")
|
||||
print("\nRMS error for the test set: ${:.0f}"
|
||||
.format(PRICE_NORM_FACTOR * average_loss**0.5))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# The Estimator periodically generates "INFO" logs; make these logs visible.
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
tf.app.run(main=main)
|
@ -1,89 +0,0 @@
|
||||
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""A simple smoke test that runs these examples for 1 training iteration."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from six.moves import StringIO
|
||||
|
||||
import tensorflow.examples.get_started.regression.imports85 as imports85
|
||||
|
||||
sys.modules["imports85"] = imports85
|
||||
|
||||
# pylint: disable=g-bad-import-order,g-import-not-at-top
|
||||
import tensorflow.data as data
|
||||
|
||||
import tensorflow.examples.get_started.regression.dnn_regression as dnn_regression
|
||||
import tensorflow.examples.get_started.regression.linear_regression_categorical as linear_regression_categorical
|
||||
import tensorflow.examples.get_started.regression.custom_regression as custom_regression
|
||||
|
||||
from tensorflow.python.platform import googletest
|
||||
from tensorflow.python.platform import test
|
||||
# pylint: disable=g-bad-import-order,g-import-not-at-top
|
||||
|
||||
|
||||
# pylint: disable=line-too-long
|
||||
FOUR_LINES = "\n".join([
|
||||
"1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500",
|
||||
"2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950",
|
||||
"2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450",
|
||||
"2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250",
|
||||
])
|
||||
|
||||
# pylint: enable=line-too-long
|
||||
|
||||
|
||||
def four_lines_dataframe():
|
||||
text = StringIO(FOUR_LINES)
|
||||
|
||||
return pd.read_csv(
|
||||
text, names=imports85.types.keys(), dtype=imports85.types, na_values="?")
|
||||
|
||||
|
||||
def four_lines_dataset(*args, **kwargs):
|
||||
del args, kwargs
|
||||
return data.Dataset.from_tensor_slices(FOUR_LINES.split("\n"))
|
||||
|
||||
|
||||
class RegressionTest(googletest.TestCase):
|
||||
"""Test the regression examples in this directory."""
|
||||
|
||||
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
|
||||
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
|
||||
@test.mock.patch.dict(linear_regression_categorical.__dict__, {"STEPS": 1})
|
||||
def test_linear_regression_categorical(self):
|
||||
linear_regression_categorical.main([""])
|
||||
|
||||
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
|
||||
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
|
||||
@test.mock.patch.dict(dnn_regression.__dict__, {"STEPS": 1})
|
||||
def test_dnn_regression(self):
|
||||
dnn_regression.main([""])
|
||||
|
||||
@test.mock.patch.dict(data.__dict__, {"TextLineDataset": four_lines_dataset})
|
||||
@test.mock.patch.dict(imports85.__dict__, {"_get_imports85": (lambda: None)})
|
||||
@test.mock.patch.dict(custom_regression.__dict__, {"STEPS": 1})
|
||||
def test_custom_regression(self):
|
||||
custom_regression.main([""])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
googletest.main()
|
Loading…
Reference in New Issue
Block a user