zoukankan      html  css  js  c++  java
  • HMM 模型输入数据处理的优雅做法 来自实际项目

    实际项目我是这样做的:

    def mining_ue_procedures_behavior(seq, lengths, imsi_list):  
        print("seq 3:", seq[:3], "lengths 3:", lengths[:3])
        # model.fit(seq, lengths)
        fitter = LabelEncoder().fit(seq)
    
        import sys
        n_components=[5, 10, 20, 30][int(sys.argv[1])]
        n_iter=[10, 30, 50, 100][int(sys.argv[2])]
    
        model_file = 'hmm_model_{}_{}.pkl'.format(n_components, n_iter)
        if os.path.exists(model_file):
            input_file = open(model_file, 'rb')
            model = pickle.load(input_file)
            input_file.close()
        else:
            model = hmm.MultinomialHMM(n_components=n_components, n_iter=n_iter)
            seq2 = fitter.transform(seq)
            model.fit(np.array([seq2]).T, lengths)
            output_file = open(model_file, 'wb')
            pickle.dump(model, output_file)
            output_file.close()
        print("model.startprob_:", model.startprob_)
        print("model.transmat_:", model.transmat_)
        print("model.emissionprob_:", model.emissionprob_)
        ## [[  1.11111111e-01   2.22222222e-01   6.66666667e-01]
        ##  [  5.55555556e-01   4.44444444e-01   6.27814351e-28]]
        start = 0
        ans = []
        for i,l in enumerate(lengths):
            s = seq[start: start+l]
            score = model.score(np.array([[d] for d in fitter.transform(s)]))
            ans.append([score, imsi_list[i], s])
            # print("score:", model.score(np.array([[d] for d in fitter.transform(s)])), s)
            start += l
        ans.sort(key=lambda x: x[0])
        score_index = 0
        malicious_ue = []
        for i,item in enumerate(ans):
            if item[score_index] < Config.HMMBaseScore:
                malicious_ue.append(item)
            print(item)
        # print(ans)
    

      

    输入数据参考了下面的优雅做法:

    # predict a sequence of hidden states based on visible states
    seq = []
    lengths = []
    for _ in range(100):
        length = random.randint(5, 10)
        lengths.append(length)
        for _ in range(length):
            r = random.random()
            if r < .2:
                seq.append(0)
            elif r < .6:
                seq.append(1)
            else:
                seq.append(2)
    seq = np.array([seq]).T
    model = model.fit(seq, lengths)
    

    此外,HMM模型的持续增量训练:

    # 解决问题3,学习问题,仅给出X,估计模型参数,鲍姆-韦尔奇算法,其实就是基于EM算法的求解
    # 解决这个问题需要X的有一定的数据量,然后通过model.fit(X, lengths=None)来进行训练然后自己生成一个模型
    # 并不需要设置model.startprob_,model.transmat_,model.emissionprob_
    # 例如:
    
    import numpy as np
    from hmmlearn import hmm
    
    states = ["Rainy", "Sunny"]##隐藏状态
    n_states = len(states)##隐藏状态长度
    
    observations = ["walk", "shop", "clean"]##可观察的状态
    n_observations = len(observations)##可观察序列的长度
    
    model = hmm.MultinomialHMM(n_components=n_states, n_iter=1000, tol=0.01)
    
    X = np.array([[2, 0, 1, 1, 2, 0],[0, 0, 1, 1, 2, 0],[2, 1, 2, 1, 2, 0]])
    model.fit(X)
    print model.startprob_
    print model.transmat_
    print model.emissionprob_
    # [[  1.11111111e-01   2.22222222e-01   6.66666667e-01]
    #  [  5.55555556e-01   4.44444444e-01   6.27814351e-28]]
    print model.score(X)
    model.fit(X)
    print model.startprob_
    print model.transmat_
    print model.emissionprob_
    和第一次fit(X)得到的行顺序不一样
    # [[  5.55555556e-01   4.44444444e-01   9.29759770e-28]
    #  [  1.11111111e-01   2.22222222e-01   6.66666667e-01]]
    print model.score(X)
    model.fit(X)
    print model.startprob_
    print model.transmat_
    print model.emissionprob_
    print model.score(X)
    # 可以进行多次fit,然后拿评分最高的模型,就可以预测了
    print model.predict(bob_Actions, lengths=None)
    # 预测最可能的隐藏状态
    # 例如:
    # [0 1 0 0 0 1]
    print model.predict_proba(bob_Actions, lengths=None)# 预测各个隐藏状态的概率
    # 例如:
    # [[ 0.82770645  0.17229355]
    #  [ 0.27361913  0.72638087]
    #  [ 0.58700959  0.41299041]
    #  [ 0.69861348  0.30138652]
    #  [ 0.81799813  0.18200187]
    #  [ 0.24723966  0.75276034]]
    # 在生成的模型中,可以随机生成随机生成一个模型的Z和X
    X,Z = model.sample(n_samples=5, random_state=None)
    print "Bob Actions:", ", ".join(map(lambda x: observations[x], X))
    print "weathers:", ", ".join(map(lambda x: states[x], Z))
    
    
    # 保存模型
    import pickle
    output = open('D:\xxx\data1111.pkl', 'wb')
    s = pickle.dump(model, output)
    output.close()
    # 调用模型
    input = open('D:\xxx\data.pkl', 'rb')
    model = pickle.load(model)
    input.close()
    model.predict(X)  
    

      

  • 相关阅读:
    HDU 6182 A Math Problem 水题
    HDU 6186 CS Course 位运算 思维
    HDU 6188 Duizi and Shunzi 贪心 思维
    HDU 2824 The Euler function 欧拉函数
    HDU 3037 Saving Beans 多重集合的结合 lucas定理
    HDU 3923 Invoker Polya定理
    FZU 2282 Wand 组合数学 错排公式
    HDU 1452 Happy 2004 数论
    HDU 5778 abs 数论
    欧拉回路【判断连通+度数为偶】
  • 原文地址:https://www.cnblogs.com/bonelee/p/10860978.html
Copyright © 2011-2022 走看看