zoukankan      html  css  js  c++  java
  • 时间序列相似度分析算法

    时间序列的相似性分析的理论和原理:

    可按照以下几篇博客进行学习和查看分析即可

    https://www.jianshu.com/p/e8e02cdc43d5?from=groupmessage

    https://zhuanlan.zhihu.com/p/39450321

    https://wenku.baidu.com/view/58dfefbc2b160b4e777fcf77.html

    https://github.com/wannesm/dtaidistance

    https://blog.csdn.net/xsdxs/article/details/86648605?utm_medium=distribute.pc_relevant.none-task-blog-baidujs-2

    时间序列分析不同算法的实现:主要是进行相同时间段的数据的时序形状对比输出展示

    #欧氏距离公式计算时间序列的相似度
    import numpy as np
    import matplotlib.pyplot as plt

    def Oushidistance(s1,s2):
    m=len(s1)
    n=len(s2)
    plt.plot(s1, "r", s2, "g")
    plt.show()
    s1 = (s1 - np.mean(s1)) / np.std(s1)
    s2 = (s2 - np.mean(s2)) / np.std(s2)
    distance=0
    for i in range(m):
    distance+=(s1[i]-s2[i])**2
    return np.sqrt(distance)

    if __name__ == '__main__':
    s1 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1])
    s2 = np.array([0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2])
    s3 = np.array([0.8, 1.5, 0, 1.2, 0, 0, 0.6, 1, 1.2, 0, 0, 1, 0.2, 2.4, 0.5, 0.4])
    s4 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1.5])
    s5 = np.array([x+1 for x in s1])
    print(Oushidistance(s1,s2))
    print(Oushidistance(s1,s3))
    print(Oushidistance(s1,s4))
    print(Oushidistance(s1,s5))
    #DWT算法的实现和对比分析展示
    import numpy as np
    import matplotlib.pyplot as plt
    float_formatter = lambda x: "%.2f" % x
    np.set_printoptions(formatter={'float_kind': float_formatter})

    def TimeSeriesSimilarity(s1, s2):
    l1 = len(s1)
    l2 = len(s2)
    plt.plot(s1, "r", s2, "g")
    plt.show()
    s1 = (s1 - np.mean(s1)) / np.std(s1)
    s2 = (s2 - np.mean(s2)) / np.std(s2)
    paths = np.full((l1 + 1, l2 + 1), np.inf) # 全部赋予无穷大
    paths[0, 0] = 0
    for i in range(l1):
    for j in range(l2):
    d = s1[i] - s2[j]
    cost = d ** 2
    paths[i + 1, j + 1] = cost + min(paths[i, j + 1], paths[i + 1, j], paths[i, j])

    paths = np.sqrt(paths)
    s = paths[l1, l2]
    return s, paths.T


    if __name__ == '__main__':
    s1 = [1, 2, 0, 1, 1, 2]
    s2 = [1, 0, 1]
    s2 = [3,5,1,3,3,5]
    plt.plot(s1,"r",s2,"g")
    plt.show()
    print(s1,s2)
    distance, paths = TimeSeriesSimilarity(s1, s2)
    print(distance)

    s1 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1])
    s2 = np.array([0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2])
    s3 = np.array([0.8, 1.5, 0, 1.2, 0, 0, 0.6, 1, 1.2, 0, 0, 1, 0.2, 2.4, 0.5, 0.4])
    s4 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1.5])
    s5 = np.array([x + 1 for x in s1])
    distance, paths = TimeSeriesSimilarity(s1, s2)
    print(distance)
    distance, paths = TimeSeriesSimilarity(s1, s3)
    print(distance)
    distance, paths = TimeSeriesSimilarity(s1, s4)
    print(distance)
    distance, paths = TimeSeriesSimilarity(s1, s5)
    print(distance)
    #基于形态相似度计算的时间序列相似度计算
    import numpy as np
    import matplotlib.pyplot as plt

    def Oushidistance(s1,s2):
    m=len(s1)
    n=len(s2)
    plt.plot(s1, "r", s2, "g")
    plt.show()
    s1 = (s1 - np.mean(s1)) / np.std(s1)
    s2 = (s2 - np.mean(s2)) / np.std(s2)
    distance=np.sqrt(np.sum((s1-s2)**2))
    ASD=abs(np.sum(s1-s2))
    SAD=np.sum(abs(s1-s2))
    if SAD==0:
    return 0
    else:
    distance1=distance*(2-ASD/SAD)
    return distance1


    if __name__ == '__main__':
    s1 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1])
    s2 = np.array([0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2])
    s3 = np.array([0.8, 1.5, 0, 1.2, 0, 0, 0.6, 1, 1.2, 0, 0, 1, 0.2, 2.4, 0.5, 0.4])
    s4 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1.5])
    s5 = np.array([x + 1 for x in s1])
    print(Oushidistance(s1,s2))
    print(Oushidistance(s1,s3))
    print(Oushidistance(s1,s4))
    print(Oushidistance(s1,s5))
    #改进版本的DWT算法1实现展示
    import numpy as np
    import math

    def get_common_seq(best_path, threshold=1):
    com_ls = []
    pre = best_path[0]
    length = 1
    for i, element in enumerate(best_path):
    if i == 0:
    continue
    cur = best_path[i]
    if cur[0] == pre[0] + 1 and cur[1] == pre[1] + 1:
    length = length + 1
    else:
    com_ls.append(length)
    length = 1
    pre = cur
    com_ls.append(length)
    return list(filter(lambda num: True if threshold < num else False, com_ls))


    def calculate_attenuate_weight(seqLen, com_ls):
    weight = 0
    for comlen in com_ls:
    weight = weight + (comlen * comlen) / (seqLen * seqLen)
    return 1 - math.sqrt(weight)


    def best_path(paths):
    """Compute the optimal path from the nxm warping paths matrix."""
    i, j = int(paths.shape[0] - 1), int(paths.shape[1] - 1)
    p = []
    if paths[i, j] != -1:
    p.append((i - 1, j - 1))
    while i > 0 and j > 0:
    c = np.argmin([paths[i - 1, j - 1], paths[i - 1, j], paths[i, j - 1]])
    if c == 0:
    i, j = i - 1, j - 1
    elif c == 1:
    i = i - 1
    elif c == 2:
    j = j - 1
    if paths[i, j] != -1:
    p.append((i - 1, j - 1))
    p.pop()
    p.reverse()
    return p


    def TimeSeriesSimilarity(s1, s2):
    l1 = len(s1)
    l2 = len(s2)
    paths = np.full((l1 + 1, l2 + 1), np.inf) # 全部赋予无穷大
    paths[0, 0] = 0
    s1 = (s1 - np.mean(s1)) / np.std(s1)
    s2 = (s2 - np.mean(s2)) / np.std(s2)
    for i in range(l1):
    for j in range(l2):
    d = s1[i] - s2[j]
    cost = d ** 2
    paths[i + 1, j + 1] = cost + min(paths[i, j + 1], paths[i + 1, j], paths[i, j])

    paths = np.sqrt(paths)
    s = paths[l1, l2]
    return s, paths.T


    if __name__ == '__main__':
    # 测试数据
    s1 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1])
    s2 = np.array([0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2])
    s3 = np.array([0.8, 1.5, 0, 1.2, 0, 0, 0.6, 1, 1.2, 0, 0, 1, 0.2, 2.4, 0.5, 0.4])

    # 原始算法
    distance12, paths12 = TimeSeriesSimilarity(s1, s2)
    distance13, paths13 = TimeSeriesSimilarity(s1, s3)

    print("更新前s1和s2距离:" + str(distance12))
    print("更新前s1和s3距离:" + str(distance13))

    best_path12 = best_path(paths12)
    best_path13 = best_path(paths13)

    # 衰减系数
    com_ls1 = get_common_seq(best_path12)
    com_ls2 = get_common_seq(best_path13)

    # print(len(best_path12), com_ls1)
    # print(len(best_path13), com_ls2)
    weight12 = calculate_attenuate_weight(len(best_path12), com_ls1)
    weight13 = calculate_attenuate_weight(len(best_path13), com_ls2)

    # 更新距离
    print("更新后s1和s2距离:" + str(distance12 * weight12))
    print("更新后s1和s3距离:" + str(distance13 * weight13))

    #改进版本的DWT算法实现第二种方式
    import numpy as np

    float_formatter = lambda x: "%.2f" % x
    np.set_printoptions(formatter={'float_kind': float_formatter})


    def TimeSeriesSimilarityImprove(s1, s2):
    # 取较大的标准差
    sdt = np.std(s1, ddof=1) if np.std(s1, ddof=1) > np.std(s2, ddof=1) else np.std(s2, ddof=1)
    # print("两个序列最大标准差:" + str(sdt))
    l1 = len(s1)
    l2 = len(s2)
    paths = np.full((l1 + 1, l2 + 1), np.inf) # 全部赋予无穷大
    sub_matrix = np.full((l1, l2), 0) # 全部赋予0
    max_sub_len = 0
    s1 = (s1 - np.mean(s1)) / np.std(s1)
    s2 = (s2 - np.mean(s2)) / np.std(s2)
    paths[0, 0] = 0
    for i in range(l1):
    for j in range(l2):
    d = s1[i] - s2[j]
    cost = d ** 2
    paths[i + 1, j + 1] = cost + min(paths[i, j + 1], paths[i + 1, j], paths[i, j])
    if np.abs(s1[i] - s2[j]) < sdt:
    if i == 0 or j == 0:
    sub_matrix[i][j] = 1
    else:
    sub_matrix[i][j] = sub_matrix[i - 1][j - 1] + 1
    max_sub_len = sub_matrix[i][j] if sub_matrix[i][j] > max_sub_len else max_sub_len

    paths = np.sqrt(paths)
    s = paths[l1, l2]
    #return s, paths.T, [max_sub_len]

    weight = 0
    for comlen in [max_sub_len]:
    weight = weight + comlen / len(s1) * comlen / len(s2)
    a=1 - weight
    distance=s*a
    return distance


    if __name__ == '__main__':
    # 测试数据
    s1 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1])
    s2 = np.array([0, 1, 1, 2, 0, 1, 1.7, 2, 0, 1, 1, 2, 0, 1, 1, 2])
    s3 = np.array([0.8, 1.5, 0, 1.2, 0, 0, 0.6, 1, 1.2, 0, 0, 1, 0.2, 2.4, 0.5, 0.4])
    s4 = np.array([1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 2])
    s5 = np.array([x + 1 for x in s1])
    print(TimeSeriesSimilarityImprove(s1, s2))
    print(TimeSeriesSimilarityImprove(s1, s3))
    print(TimeSeriesSimilarityImprove(s1, s4))
    print(TimeSeriesSimilarityImprove(s1, s5))

     时间序列分析对比展示图像

  • 相关阅读:
    c++重载operator的示例 非原创
    L1-2 倒数第N个字符串 (15 分)真坑
    error C2955: “std::xx”: 使用 类 模板 需要 模板 参数列表
    时间超限问题处理(c++)
    C语言实验1
    心理魔术
    闰年作业
    20180425
    Labview学习笔记-条件结构的两个问题
    判断文件目录或者文件是否存在
  • 原文地址:https://www.cnblogs.com/Yanjy-OnlyOne/p/13337026.html
Copyright © 2011-2022 走看看