zoukankan      html  css  js  c++  java
  • R语言-变量聚类

    > ######变量聚类
    > setwd("/Users/yaozhilin/Downloads/R_edu/data")
    > accepts<-read.csv("accepts.csv")
    > #导入ClustOfvar包——>用hclustvar对变量进行聚类及用stability确定聚类数量——>用
    > #cutreevar把hclustvar的树参照stability数据进行切割
    > library(ClustOfVar)
    > orgData<-accepts[,c(-1,-2,-3,-5,-6)]
    > tree<-hclustvar(orgData)
    > plot(tree)
    

    st<-stability(tree,B=30)
    

    > part <- cutreevar(tree,7,matsim = TRUE)#聚成7类查看
    > print(part)
    
    Call:
    cutreevar(obj = tree, k = 7, matsim = TRUE)
    
    
    
     name       description                                                                    
     "$var"     "list of variables in each cluster"                                            
     "$sim"     "similarity matrix in each cluster"                                            
     "$cluster" "cluster memberships"                                                          
     "$wss"     "within-cluster sum of squares"                                                
     "$E"       "gain in cohesion (in %)"                                                      
     "$size"    "size of each cluster"                                                         
     "$scores"  "synthetic score of each cluster"                                              
     "$coef"    "coef of the linear combinations defining the synthetic scores of each cluster"
    > summary(part)
    
    Call:
    cutreevar(obj = tree, k = 7, matsim = TRUE)
    
    
    
    Data: 
       number of observations:  5845
       number of variables:  19
       number of clusters:  7
    
    Cluster  1 : 
                 squared loading correlation
    purch_price             0.90       -0.95
    loan_amt                0.90       -0.95
    msrp                    0.90       -0.95
    vehicle_year            0.16       -0.40
    
    
    Cluster  2 : 
               squared loading correlation
    fico_score            0.69        0.84
    tot_derog             0.60       -0.78
    rev_util              0.31       -0.56
    
    
    Cluster  3 : 
                  squared loading correlation
    tot_rev_tr               0.70        0.84
    tot_rev_line             0.61        0.79
    tot_rev_debt             0.61        0.79
    tot_open_tr              0.61        0.83
    tot_tr                   0.55        0.74
    age_oldest_tr            0.26        0.51
    
    
    Cluster  4 : 
             squared loading correlation
    down_pyt            0.73        0.85
    ltv                 0.73       -0.85
    
    
    Cluster  5 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  6 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  7 : 
                squared loading correlation
    used_ind               0.79       -0.89
    veh_mileage            0.79       -0.89
    
    
    Gain in cohesion (in %):  59.07
    
    > part$sim                            #查看聚类后同类的相关性矩阵
    $cluster1
                 vehicle_year purch_price       msrp   loan_amt
    vehicle_year   1.00000000  0.05961112 0.08725898 0.05578607
    purch_price    0.05961112  1.00000000 0.74949104 0.80971082
    msrp           0.08725898  0.74949104 1.00000000 0.75508338
    loan_amt       0.05578607  0.80971082 0.75508338 1.00000000
    
    $cluster2
                tot_derog   rev_util fico_score
    tot_derog  1.00000000 0.02360717 0.21185851
    rev_util   0.02360717 1.00000000 0.06931344
    fico_score 0.21185851 0.06931344 1.00000000
    
    $cluster3
                     tot_tr age_oldest_tr tot_open_tr tot_rev_tr tot_rev_debt tot_rev_line
    tot_tr        1.0000000    0.22550267   0.2348436 0.19762456   0.14830133    0.3122191
    age_oldest_tr 0.2255027    1.00000000   0.0301182 0.04633906   0.05585033    0.1761412
    tot_open_tr   0.2348436    0.03011820   1.0000000 0.59139623   0.28310367    0.1651928
    tot_rev_tr    0.1976246    0.04633906   0.5913962 1.00000000   0.43259981    0.2627265
    tot_rev_debt  0.1483013    0.05585033   0.2831037 0.43259981   1.00000000    0.3411766
    tot_rev_line  0.3122191    0.17614119   0.1651928 0.26272651   0.34117657    1.0000000
    
    $cluster4
              down_pyt       ltv
    down_pyt 1.0000000 0.2086998
    ltv      0.2086998 1.0000000
    
    $cluster5
              loan_term
    loan_term         1
    
    $cluster6
               tot_income
    tot_income          1
    
    $cluster7
                veh_mileage  used_ind
    veh_mileage   1.0000000 0.3313023
    used_ind      0.3313023 1.0000000
    
    > part$var   
    $cluster1
                 squared loading correlation
    purch_price        0.9047017  -0.9511581
    loan_amt           0.9046381  -0.9511247
    msrp               0.8983588  -0.9478394
    vehicle_year       0.1562745  -0.3953246
    
    $cluster2
               squared loading correlation
    fico_score       0.6927142   0.8380766
    tot_derog        0.6003257  -0.7763639
    rev_util         0.3121919  -0.5587414
    
    $cluster3
                  squared loading correlation
    tot_rev_tr          0.6994471   0.8409839
    tot_rev_line        0.6125771   0.7859612
    tot_rev_debt        0.6116821   0.7853868
    tot_open_tr         0.6094718   0.8271140
    tot_tr              0.5475506   0.7399666
    age_oldest_tr       0.2552606   0.5052449
    
    $cluster4
             squared loading correlation
    down_pyt       0.7284184   0.8534743
    ltv            0.7284184  -0.8534817
    
    $cluster5
    squared loading     correlation 
                  1               1 
    
    $cluster6
    squared loading     correlation 
                  1               1 
    
    $cluster7
                squared loading correlation
    used_ind          0.7877943  -0.8875778
    veh_mileage       0.7877943  -0.8876091
    
    > part <- cutreevar(tree,16,matsim = TRUE)#聚成16类查看
    > print(part)
    
    Call:
    cutreevar(obj = tree, k = 16, matsim = TRUE)
    
    
    
     name       description                                                                    
     "$var"     "list of variables in each cluster"                                            
     "$sim"     "similarity matrix in each cluster"                                            
     "$cluster" "cluster memberships"                                                          
     "$wss"     "within-cluster sum of squares"                                                
     "$E"       "gain in cohesion (in %)"                                                      
     "$size"    "size of each cluster"                                                         
     "$scores"  "synthetic score of each cluster"                                              
     "$coef"    "coef of the linear combinations defining the synthetic scores of each cluster"
    > summary(part)
    
    Call:
    cutreevar(obj = tree, k = 16, matsim = TRUE)
    
    
    
    Data: 
       number of observations:  5845
       number of variables:  19
       number of clusters:  16
    
    Cluster  1 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  2 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  3 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  4 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  5 : 
                squared loading correlation
    tot_open_tr            0.88        0.96
    tot_rev_tr             0.88        0.94
    
    
    Cluster  6 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  7 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  8 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  9 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  10 : 
                squared loading correlation
    loan_amt               0.93        0.96
    purch_price            0.93        0.96
    msrp                   0.90        0.95
    
    
    Cluster  11 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  12 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  13 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  14 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  15 : 
    squared loading     correlation 
                  1               1 
    
    
    Cluster  16 : 
    squared loading     correlation 
                  1               1 
    
    
    Gain in cohesion (in %):  96.85
    > part$sim                            #查看相关性矩阵
    $cluster1
                 vehicle_year
    vehicle_year            1
    
    $cluster2
              tot_derog
    tot_derog         1
    
    $cluster3
           tot_tr
    tot_tr      1
    
    $cluster4
                  age_oldest_tr
    age_oldest_tr             1
    
    $cluster5
                tot_open_tr tot_rev_tr
    tot_open_tr   1.0000000  0.5913962
    tot_rev_tr    0.5913962  1.0000000
    
    $cluster6
                 tot_rev_debt
    tot_rev_debt            1
    
    $cluster7
                 tot_rev_line
    tot_rev_line            1
    
    $cluster8
             rev_util
    rev_util        1
    
    $cluster9
               fico_score
    fico_score          1
    
    $cluster10
                purch_price      msrp  loan_amt
    purch_price   1.0000000 0.7494910 0.8097108
    msrp          0.7494910 1.0000000 0.7550834
    loan_amt      0.8097108 0.7550834 1.0000000
    
    $cluster11
             down_pyt
    down_pyt        1
    
    $cluster12
              loan_term
    loan_term         1
    
    $cluster13
        ltv
    ltv   1
    
    $cluster14
               tot_income
    tot_income          1
    
    $cluster15
                veh_mileage
    veh_mileage           1
    
    $cluster16
             used_ind
    used_ind        1
    
    > part$var   
    $cluster1
    squared loading     correlation 
                  1               1 
    
    $cluster2
    squared loading     correlation 
                  1               1 
    
    $cluster3
    squared loading     correlation 
                  1               1 
    
    $cluster4
    squared loading     correlation 
                  1               1 
    
    $cluster5
                squared loading correlation
    tot_open_tr       0.8845115   0.9597241
    tot_rev_tr        0.8845115   0.9404847
    
    $cluster6
    squared loading     correlation 
                  1               1 
    
    $cluster7
    squared loading     correlation 
                  1               1 
    
    $cluster8
    squared loading     correlation 
                  1               1 
    
    $cluster9
    squared loading     correlation 
                  1               1 
    
    $cluster10
                squared loading correlation
    loan_amt          0.9275256   0.9630813
    purch_price       0.9253048   0.9619276
    msrp              0.9036108   0.9506082
    
    $cluster11
    squared loading     correlation 
                  1               1 
    
    $cluster12
    squared loading     correlation 
                  1               1 
    
    $cluster13
    squared loading     correlation 
                  1               1 
    
    $cluster14
    squared loading     correlation 
                  1               1 
    
    $cluster15
    squared loading     correlation 
                  1               1 
    
    $cluster16
    squared loading     correlation 
                  1               1 
    
  • 相关阅读:
    Go语言入门
    简述cookies 和 session
    Linux inode 理解
    BZOJ 1012 最大数maxnumber
    BZOJ 1087 互不侵犯king
    CSS从大图中抠取小图完整教程(background-position应用)
    javascript中i++与++i
    脱离文档流分析
    在Windows上以zip压缩包方式安装mysql
    centos7 python2.7下安装paramiko模块
  • 原文地址:https://www.cnblogs.com/ye20190812/p/13910797.html
Copyright © 2011-2022 走看看