-
Notifications
You must be signed in to change notification settings - Fork 266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Error in prediction on Cloud model #65
Comments
My code from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print("Using Tensorflow version %s" % (tf.__version__))
CATEGORICAL_COLUMNS = ["workclass","education","marital_status","occupation",
"relationship","race","gender","native_country"]
# Columns of the input csv file
COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
"relationship","race","gender","captial_gain","capital_loss",
"hours_per_week","native_country","income_bracket"]
# Feature columns for input into the model
FEATURE_COLUMNS=["age","workclass","fnlwgt","education","education_num","marital_status","occupation",
"relationship","race","gender","captial_gain","capital_loss",
"hours_per_week","native_country"]
BATCH_SIZE = 40
def generate_input_fn(filename, batch_size=BATCH_SIZE):
def _input_fn():
filename_queue = tf.train.string_input_producer([filename])
reader=tf.TextLineReader()
# reads out batch_size number of lines
key,value = reader.read_up_to(filename_queue,
num_records=batch_size)
# record_defaults should match the datatypes of each respective column
record_defaults = [[0],[" "],[0],[" "],[0],
[" "], [" "], [" "], [" "], [" "],
[0],[0],[0], [" "], [" "]]
# decode csv data that was just read out
columns = tf.decode_csv(value,record_defaults=record_defaults)
# features is a dictonary that maps from column names to tensors
# income_bracket is the last column of the data. Note that this is
all_columns = dict(zip(COLUMNS,columns))
# Save the income_bracket column as our labels
# dict.pop returns the popped array of income_bracket values
income_bracket = all_columns.pop('income_bracket')
# removes the fnlwgt key, which is not used
all_columns.pop('fnlwgt',"fnlwgt key not found")
# the remaining columns are our features
features = all_columns
# Sparse Categorical features must be represented with an additional
# There is no additional work needed for the continuous columns;
# see docs for tf.SparseTensor for more info
for feature_name in CATEGORICAL_COLUMNS:
features[feature_name] = tf.expand_dims(
features[feature_name],-1)
# convert ">50K: to 1 and "<=50K to 0
labels = tf.to_int32(tf.equal(income_bracket," >50K"))
assert isinstance(labels, object)
return features, labels
return _input_fn
print('input function configured')
# sparse base columns
gender = tf.contrib.layers.sparse_column_with_keys(column_name="gender",
keys=["female","male"])
race = tf.contrib.layers.sparse_column_with_keys(column_name="race",
keys=["Amer-Indian-Enkimo",
"Asian-Pac-Inslander",
"Black", "Other",
"White"])
education = tf.contrib.layers.sparse_column_with_hash_bucket(
"education", hash_bucket_size=1000)
marital_status = tf.contrib.layers.sparse_column_with_hash_bucket(
"marital_status", hash_bucket_size=100)
workclass = tf.contrib.layers.sparse_column_with_hash_bucket(
"workclass", hash_bucket_size=100)
relationship = tf.contrib.layers.sparse_column_with_hash_bucket(
"relationship", hash_bucket_size=100)
occupation = tf.contrib.layers.sparse_column_with_hash_bucket(
"occupation", hash_bucket_size=1000)
native_country = tf.contrib.layers.sparse_column_with_hash_bucket(
"native_country", hash_bucket_size=1000)
print('Sparse columns configured')
age = tf.contrib.layers.real_valued_column("age")
education_num = tf.contrib.layers.real_valued_column("education_num")
captial_gain = tf.contrib.layers.real_valued_column("captial_gain")
capital_loss = tf.contrib.layers.real_valued_column("capital_loss")
hours_per_week = tf.contrib.layers.real_valued_column("hours_per_week")
print('Continuous columns configured')
wide_columns =[gender, race, native_country,
education, occupation, workclass,
marital_status, relationship]
# age_buckets,
# education_occupation,
# age_race_occupation,country_occupation]
deep_columns = [
tf.contrib.layers.embedding_column(workclass, dimension=8),
tf.contrib.layers.embedding_column(education, dimension=8),
tf.contrib.layers.embedding_column(marital_status, dimension=8),
tf.contrib.layers.embedding_column(gender, dimension=8),
tf.contrib.layers.embedding_column(relationship, dimension=8),
tf.contrib.layers.embedding_column(race, dimension=8),
tf.contrib.layers.embedding_column(native_country, dimension=8),
tf.contrib.layers.embedding_column(occupation, dimension=8),
age,
education_num,
captial_gain,
capital_loss,
hours_per_week
]
print('wide and deep columns configured')
def create_model_dir(model_type):
return 'models/model_' + model_type + '_' + str(int(time.time()))
# If new_model = False, pass in the desired model_dir
def get_model(model_type, new_model=False, model_dir=None):
if new_model or model_dir is None:
model_dir = create_model_dir(model_type)
print("Model_directory=%s" % model_dir)
m = None
# Linear Classifier
if model_type == 'WIDE':
m = tf.contrib.learn.LinearClassifier(
model_dir=model_dir,
feature_columns=wide_columns)
# Deep neural net classifier
if model_type == 'DEEP':
m = tf.contrib.learn.DNNClassifier(
model_dir=model_dir,
feature_columns=deep_columns)
if model_type == 'WIDE_AND_DEEP':
m = tf.contrib.learn.DNNLinearCombinedClassifier(
model_dir=model_dir,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_hidden_units=[100, 70, 50, 25])
print('estimator build')
return m,model_dir
MODEL_TYPE = 'WIDE_AND_DEEP'
#MODEL_TYPE = 'DEEP'
model_dir = create_model_dir(model_type=MODEL_TYPE)
m,model_dir = get_model(model_type='WIDE_AND_DEEP',model_dir=model_dir)
# gsutil cp gs://cloudml-public/census/data/adult.data.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData
# gsutil cp gs://cloudml-public/census/data/adult.test.csv C:\Users\vikas\PycharmProjects\TensorflowUScensusData
train_file = "adult.data.csv"
train_steps = 1000
m.fit(input_fn=generate_input_fn(train_file, BATCH_SIZE),
steps=train_steps)
print('fit done')
test_file = "adult.test.csv"
results = m.evaluate(input_fn=generate_input_fn(test_file),steps=100)
print("evaluate done")
print(results)
print('Accuracy: %s' %results['accuracy'])
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
def column_to_dtype(column):
if column in CATEGORICAL_COLUMNS:
return tf.string
else:
return tf.float32
def serving_input_fn():
feature_placeholders = {
column: tf.placeholder(column_to_dtype(column),[None])
for column in FEATURE_COLUMNS
}
# DNNCombinedLinearClassification expects rank 2 Tensors, but input should be
# rank 1, so that we can provide scalars to server
features = {
key:tf.expand_dims(tensor,-1)
for key, tensor in feature_placeholders.items()
}
return input_fn_utils.InputFnOps(
features, #input into graph
None,
feature_placeholders #tensor input converted from request
)
export_folder = m.export_savedmodel(
export_dir_base=model_dir + "/export/",
input_fn=serving_input_fn
)
print('model exported successfully to {}'.format(export_folder)) |
gcloud ml-engine predict --model Deep_Wide --version v10 --json-instances data/test.json {"age": 25, "workclass": " Private", "education": " 11th", "education_num": 7, "marital_status": " Never-married", "occupation": " Machine-op-inspct", "relationship": " Own-child", "race": " Black", "gender": " Male", "capital_gain": 12, "capital_loss": 3, "hours_per_week": 40, "native_country": " United-States"} |
Have you been able to get this to work? If so, what steps? |
No it has not worked |
Just got it to work. Had to remove line-breaks from the json file. Everything has to be contained on a single line. Does that help? |
I am getting the following error when doing prediction after deploying model in cloud
In my local
C:\Program Files (x86)\Google\Cloud SDK>gcloud ml-engine predict --model Deep_Wide --version v4 --json-instances C:\Users\vikas\PycharmProjects\TensorflowUScensusData\test.json
{
"error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64"
}
As well as if i run the same in cloud
vkg_vikas_gupta@vikas-sapref:~$ gcloud ml-engine predict --model Deep_Wide --version v5 --json-instances data/test.json
{
"error": "Prediction failed: Error processing input: Incompatible types: 0 vs. float64"
}
The text was updated successfully, but these errors were encountered: