1 用通俗的语言介绍下线性回归->逻辑回归->SVM之间的区别和联系。
2 聚类算法的应用场景,以及k-means中的k值怎么确定。

1 def center(data): 2 3 center = [] 4 for num in data: 5 sumX = 0; sumY = 0 6 for j in num: 7 sumX += j[0] 8 sumY += j[1] 9 x = float(sumX) / len(data) 10 y = float(sumY) / len(data) 11 center.append([x, y]) 12 13 return center 14 15 def distance(one, two): 16 17 sumT = 0 18 for i in range(len(one)): 19 sumT += pow((one[i] - two[i]), 2) 20 21 return pow(sumT, 0.5) 22 23 def update(data, kcenter): 24 25 26 length = len(kcenter) 27 ret = [0] * length 28 for i in range(length): 29 ret[i] = [] 30 31 for num in data: 32 tmp = [] 33 for point in kcenter: 34 tmp.append(distance(num, point)) 35 ret[tmp.index(min(tmp))].append(num) 36 37 return ret 38 39 if __name__ == '__main__': 40 41 data = [(1, 2), (2, 3), (1, 6), (8, 9)] 42 kcenter = [[0.2, 1.2], [2, 3]] 43 error = 0.0000001 44 45 while True: 46 rt = update(data, kcenter) 47 tmp = center(rt) 48 sume = 0 49 for sa in range(len(kcenter)): 50 sume += distance(tmp[sa], kcenter[sa]) 51 if sume < error: 52 print rt 53 break 54 else: 55 kcenter = tmp 56
3 协同过滤中评分矩阵中的元素怎么确定。大矩阵怎么分解。
4 文本挖掘怎么处理。