1,数字类型的
education_num = tf.feature_column.numeric_column('education_num')
capital_gain = tf.feature_column.numeric_column('capital_gain')
capital_loss = tf.feature_column.numeric_column('capital_loss')
hours_per_week = tf.feature_column.numeric_column('hours_per_week')
my_numeric_columns = [age,education_num, capital_gain, capital_loss, hours_per_week]
fc.input_layer(feature_batch, my_numeric_columns).numpy()
2,Categorical columns vocabulary
relationship = fc.categorical_column_with_vocabulary_list(
'relationship',
['Husband', 'Not-in-family', 'Wife', 'Own-child', 'Unmarried', 'Other-relative'])
fc.input_layer(feature_batch, [age, fc.indicator_column(relationship)])
3,If we don't know the set of possible values in advance
occupation = tf.feature_column.categorical_column_with_hash_bucket(
'occupation', hash_bucket_size=1000)
for item in feature_batch['occupation'].numpy():
print(item.decode())
occupation_result = fc.input_layer(feature_batch, [fc.indicator_column(occupation)])
occupation_result.numpy().shape
4,综合
my_categorical_columns = [relationship, occupation, education, marital_status, workclass]
classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns)
classifier.train(train_inpf)
result = classifier.evaluate(test_inpf)
clear_output()
for key,value in sorted(result.items()):
print('%s: %s' % (key, value))
classifier = tf.estimator.LinearClassifier(feature_columns=my_numeric_columns+my_categorical_columns)
classifier.train(train_inpf)
result = classifier.evaluate(test_inpf)
clear_output()
for key,value in sorted(result.items()):
print('%s: %s' % (key, value))
5,连续数值的分列桶化
age_buckets = tf.feature_column.bucketized_column(
age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
fc.input_layer(feature_batch, [age, age_buckets]).numpy()
6,联合多个列
education_x_occupation = tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000)
age_buckets_x_education_x_occupation = tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000)
7,总之,有这些类型
CategoricalColumn
NumericColumn
BucketizedColumn
CrossedColumn
import tempfile
base_columns = [
education, marital_status, relationship, workclass, occupation,
age_buckets,
]
crossed_columns = [
tf.feature_column.crossed_column(
['education', 'occupation'], hash_bucket_size=1000),
tf.feature_column.crossed_column(
[age_buckets, 'education', 'occupation'], hash_bucket_size=1000),
]
model = tf.estimator.LinearClassifier(
model_dir=tempfile.mkdtemp(),
feature_columns=base_columns + crossed_columns,
optimizer=tf.train.FtrlOptimizer(learning_rate=0.1))