两组序列数据,求两者的线性相关系数。
1:使用numpy
import random import numpy as np a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] # 先构造一个矩阵 ab = np.array([a, b]) # 计算协方差矩阵 print(np.cov(ab)) print(np.corrcoef(ab))
2:使用pandas
import pandas as pd # 使用 pandas 计算协方差、相关系数 # 使用 DataFrame 作为数据结构,为方便计算,我们会将 ab 矩阵转置 dfab = pd.DataFrame(ab.T, columns=['A', 'B']) # A B 协方差 print(dfab.A.cov(dfab.B)) # A B 相关系数 print(dfab.A.corr(dfab.B))
3:使用原生函数
import random import math a = [random.randint(0, 10) for t in range(20)] b = [random.randint(0, 10) for t in range(20)] #计算平均值 def mean(x): return sum(x) / len(x) # 计算每一项数据与均值的差 def de_mean(x): x_bar = mean(x) return [x_i - x_bar for x_i in x] # 辅助计算函数 dot product 、sum_of_squares def dot(v, w): return sum(v_i * w_i for v_i, w_i in zip(v, w)) def sum_of_squares(v): return dot(v, v) # 方差 def variance(x): n = len(x) deviations = de_mean(x) return sum_of_squares(deviations) / (n - 1) # 标准差 def standard_deviation(x): return math.sqrt(variance(x)) # 协方差 def covariance(x, y): n = len(x) return dot(de_mean(x), de_mean(y)) / (n -1) # 相关系数 def correlation(x, y): stdev_x = standard_deviation(x) stdev_y = standard_deviation(y) if stdev_x > 0 and stdev_y > 0: return covariance(x, y) / stdev_x / stdev_y else: return 0 print(a) print(b) print(standard_deviation(a)) print(standard_deviation(b)) print(correlation(a,b))
4:使用R,spss,excel