zoukankan      html  css  js  c++  java
  • 大作业

    一、boston房价预测

     1 #多元线性回归模型 
     2 from sklearn.datasets import load_boston
     3 from sklearn.model_selection import train_test_split
     4 #波士顿房价数据 
     5 data = load_boston()
     6 #划分数据集 
    7 x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.3)
    8 #建立多元线性回归模型 9 from sklearn.linear_model import LinearRegression 10 mlr = LinearRegression() 11 mlr.fit(x_train,y_train) 12 w = mlr.coef_ 13 b = mlr.intercept_ 14 print("系数",w," 截距",b) 15 16 #检测模型好坏 17 18 from sklearn.metrics import regression 19 y_predict = mlr.predict(x_test) 20 21 #计算模型的预测指标 22 print("预测的均方误差:",regression.mean_squared_error(y_test,y_predict)) 23 print("预测的平均绝对误差:",regression.mean_absolute_error(y_test,y_predict)) 24 #打印模型的分数 25 print("模型的分数:",mlr.score(x_test,y_test)) 26 27 28 #多元多项式回归模型 29 #多项式化 30 from sklearn.preprocessing import PolynomialFeatures 31 poly2 = PolynomialFeatures(degree = 2) 32 x_poly_train = poly2.fit_transform(x_train) 33 x_poly_test = poly2.transform(x_test) 34 #建立模型 35 mlrp = LinearRegression() 36 mlrp.fit(x_poly_train,y_train) 37 #预测 38 y_predict2 = mlrp.predict(x_poly_test) 39 40 #检测模型好坏 41 42 #计算模型的预测指标 43 print("预测的均方差:",regression.mean_squared_error(y_test,y_predict2))
    44 print("预测的平均绝对误差:",regression.mean_absolute_error(y_test,y_predict2)) 45 #打印模型的分数
    46 print("模型的分数:",mlrp.score(x_poly_test,y_test))

    多元线性回归模型结果:

    多元多项式回归模型结果:

    二、中文文本分类

     1 path ='C:\Users\Administrator\Desktop\中文文本分类\0369data'
     2 
     3 import os
     4 for root,dirs,files in os.walk(path):
     5     for f in files:
     6         fp = os.path.join(root,f)
     7         with open(fp,'r',encoding='utf-8') as f:
     8             content = f.read() # 获取文本
     9 
    10 with open(r'C:UsersAdministratorDesktop中文文本分类stopsCN.txt', encoding='utf-8') as f:
    11     stopwords = f.read().split('
    ')
    12 import jieba
    13 def processing(tokens):
    14     tokens = "".join([char for char in tokens if char.isalpha()]) #去掉非字母汉字的字符
    15     tokens = [token for token in jieba.cut(tokens,cut_all=True) if len(token)>=2] #结巴分词(长度大于等于2)
    16     tokens = " ".join([token for token in tokens if token not in stopwords])#去掉停用词
    17     return tokens
    18 
    19 contentList = [] #存储经过处理以后的文本
    20 classifyList = [] #存储分类出来的新闻类别
    21 #用os.walk获取需要的变量,并拼接文件路径再打开每一个文件
    22 for root,dirs,files in os.walk(path):
    23     for f in files:
    24         fp = os.path.join(root,f)
    25         with open(fp,encoding='utf-8') as f:
    26             content = f.read()
    27         #h获取新闻类别标签,并处理该新闻
    28         classify = fp.split("\")[-2]
    29         classifyList.append(classify)
    30         contentList.append(processing(content))
    31 
    32 import pandas
    33 datas = pandas.DataFrame({
    34     'classifyList': classifyList,
    35     'contentList':  contentList
    36 })
    37 print(datas)
    38 
    39 
    40 from sklearn.model_selection import train_test_split
    41 from sklearn.feature_extraction.text import TfidfVectorizer
    42 from sklearn.naive_bayes import GaussianNB,MultinomialNB
    43 from sklearn.model_selection import cross_val_score
    44 from sklearn.metrics import classification_report
    45 
    46 x_train,x_test,y_train,y_test = train_test_split(contentList,classifyList,test_size=0.3,stratify = classifyList ) #划分测试集训练集
    47 
    48 
    49 #转化为特征向量
    50 tfv = TfidfVectorizer() #用TfidfVectorizer的方法建立特征向量
    51 X_train = tfv.fit_transform(x_train) #对训练集建模
    52 X_test = tfv.transform(x_test) 
    53 
    54 #用朴素贝叶斯建立模型,处理分散型数据
    55 mnb = MultinomialNB()
    56 module = mnb.fit(X_train,y_train)
    57 
    58 y_pred = module.predict(X_test) #对测试集进行预测
    59 scores = cross_val_score(mnb,X_test,y_test,cv = 5) #输出模型精确度
    60 print("Accuracy:%.3f"%scores.mean())
    61 print("classification_report:
    ",classification_report(y_pred,y_test))

     

  • 相关阅读:
    IOTest-InputStream-OutputStream
    JSP
    java链表
    区块链
    MySQL常用命令
    jQuery
    javascript
    Nginx
    Linux
    Hive
  • 原文地址:https://www.cnblogs.com/hodafu/p/10131274.html
Copyright © 2011-2022 走看看