1 特征封装
SparseFeature
1.1 namedtuple 使用
Users=namedtuple("User",['name','sex','age'])
user=Users(name="harry",sex="male",age=22)
print(user.name,user.sex,user.age)
harry male 22
1.2 SparseFeat 和 VarLenSparseFeat
sparsefeat 继承nametuple,并在__new__ 设置初始值。
from tensorflow import keras
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
class SparseFeat(namedtuple("SparseFeat",['name','vocabulary_size',"embedding_dim","use_hash"
,'dtype',"embedding_initializer","embedding_name",
"group_name","trainable"])):
__slots__=()
def __new__(cls,name,vocabulary_size,embedding_dim,use_hash=False,dtype="int32",embedding_initializer=None
,embedding_name=None,group_name="default_group",trainable=True):
if embedding_dim=="auto":
embedding_dim=6*int(pow(vocabulary_size,0.25))
if embedding_initializer is None:
embedding_initializer=keras.initializers.RandomNormal(mean=0.0,stddev=0.0001,seed=2022)
if embedding_name is None:
embedding_name=name
return super(SparseFeat,cls).__new__(cls,name,vocabulary_size,embedding_dim,use_hash
,dtype,embedding_initializer,embedding_name
,group_name,trainable)
def __hash__(self):
return self.name.__hash__()
class VarLenSparseFeat(namedtuple("VarLenSparseFeat"
,["sparsefeat","maxlen",'combiner','length_name','weight_name','weight_norm'])):
__slots__=()
def __new__(cls,sparsefeat,maxlen,combiner='mean',length_name=None,weight_name=None,weight_norm=True):
return super(VarLenSparseFeat,cls).__new__(cls,sparsefeat,maxlen,combiner,length_name,weight_name,weight_norm)
def __hash__(self):
return self.name.__hash__()
2 特征封装应用
经需要的类别特征封装在SparseFeat和VarLenSparseFeat中,如有
feature_max_idx={'user_id': 4, 'movie_id': 208, 'gender': 3, 'age': 4, 'occupation': 4, 'zip': 4}
embedding_dim=16
user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], embedding_dim),
SparseFeat("gender", feature_max_idx['gender'], embedding_dim),
SparseFeat("age", feature_max_idx['age'], embedding_dim),
SparseFeat("occupation", feature_max_idx['occupation'], embedding_dim),
SparseFeat("zip", feature_max_idx['zip'], embedding_dim),
VarLenSparseFeat(SparseFeat('hist_movie_id',feature_max_idx['movie_id'],embedding_dim
,embedding_name="movie_id" ),50,'mean','hist_len')
]
item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]