zoukankan      html  css  js  c++  java
  • 决策树也可以做特征分析啦

    决策树也可以做特征分析啦

    那么这个代码是用于建模初期,你为了大概了解变量的一个基本特征写的,不是最优分组哈,因为这个代码是将变量最多分为12组,分这么多组的原因也是为了更好的观察特征而已啦,你要是觉得太多组,你可以改下树的深度这些调整一下,这里关于变量特征怎么看,我就不说了.....

    %macro

    zhandapao(data,DVAR,id,dir);

    proc datasets lib=work nodetails;

    delete

    varname_total;

    run;

     

    %let lib=%upcase(%scan(&data.,1,'.'));

    %letdname=%upcase(%scan(&data.,2,'.'));

    %globalvar_list var_num;

    proc sql noprint;

    select name,count(*) into :var_list separated by' ',:var_num

    from sashelp.VCOLUMN

    where left(libname)="&lib."and

    left(memname)="&dname."and

    type="num"and

    lowcase(name)^=lowcase("&DVAR.")

    and lowcase(name)^="&id.";

    quit;

    %put

    &var_list.;

     

    %doi=1%to&var_num.;

    %letnumvar_name_&i.=%scan(&var_list.,&i.);

    %put&numvar_name_1.;

    proc split data=&data.splitsize=300

    maxbranch=2

    MAXDEPTH=5nsurrs=5

    assess=lift criterion=gini;

    input &&numvar_name_&i./level=interval;

    target &DVAR./level=binary;

    Score data=&data.out=d_&&numvar_name_&i.;

    code file="&dir.treecode_tic_&&numvar_name_&i..sas";

    describe file="&dir.treerule_tic_&&numvar_name_&i..txt";

    run;

    data n_D_&&numvar_name_&i.;

    set d_&&numvar_name_&i.;

    %include"&dir.treecode_tic_&&numvar_name_&i..sas";

    rename p_&DVAR.1=p_&&numvar_name_&i.;

    run;

    proc sql noprint;

    select count(*),max(&&numvar_name_&i.),min(&&numvar_name_&i.)into:total, :max ,:min from n_D_&&numvar_name_&i.;

    quit;

    data n_D_&&numvar_name_&i.;

    set n_D_&&numvar_name_&i.;

    if &min.<=&&numvar_name_&i.<=&max.

    then flag="no_null";

    else flag="null";

    run;

    proc sql;

    select count(*) into:is_null from

    n_D_&&numvar_name_&i.;

    quit;

    %if&is_null.>0%then%do;

    proc sql noprint;

    select count(*),max(&&numvar_name_&i.),min(&&numvar_name_&i.)into:total,:max ,:min from n_D_&&numvar_name_&i.;

    create table total as

    select"&&numvar_name_&i."as

    varname,

    min(&&numvar_name_&i.) as interval_1,

    max(&&numvar_name_&i.) as interval_2,

    compress(put(min(round(&&numvar_name_&i.,0.0001)),best32.))||'-'||compress(put(max(round(&&numvar_name_&i.,0.0001)),best32.)) as interval,

    sum(&DVAR.) as bad_num,

    count(*) as total_num,

    count(*)/&total.as num_rate,

    sum(&DVAR.)/count(*) as bad_rate

    from n_D_&&numvar_name_&i.

    group by p_&&numvar_name_&i.

    union all

    select"&&numvar_name_&i."as varname,

    -9999as interval_1,

    -9999as interval_2,

    'null'as interval,

    sum(&DVAR.) as bad_num,

    count(*) as total_num,

    count(*)/&total.as num_rate,

    sum(&DVAR.)/count(*) as bad_rate

    from n_D_&&numvar_name_&i.(where=(&&numvar_name_&i.=.))

    group by p_&&numvar_name_&i.

    order by interval_1;

    quit;

    %end;

    %else%do;

    proc sql noprint;

    select count(*),max(&&numvar_name_&i.),min(&&numvar_name_&i.)into:total,:max ,:min from n_D_&&numvar_name_&i.;

    create table total as

    select"&&numvar_name_&i."as varname,

    min(&&numvar_name_&i.) asninterval_1,

    max(&&numvar_name_&i.) as interval_2,

    compress(put(min(round(&&numvar_name_&i.,0.0001)),best32.))||'-'||compress(put(max(round(&&numvar_name_&i.,0.0001)),best32.)) as interval,

    sum(&DVAR.) as bad_num,

    count(*) as total_num,

    count(*)/&total.as num_rate,

    sum(&DVAR.)/count(*) as bad_rate

    from n_D_&&numvar_name_&i.

    group by p_&&numvar_name_&i.

    order by interval_1;

    quit;

    %end;

    data &&numvar_name_&i.;

    set total;

    group=_n_;

    run;

    proc append base=varname_total

    data=&&numvar_name_&i.

    force;run;

    proc datasets lib=work nodetails;

    delete total n_: d_:

    &&numvar_name_&i.

    _namedat;

    quit;

    %end;

    %mend;

    解释一下这个代码怎么用,这个宏已经是封装好了的,直接填入参数就可以用了:

    zhandapao(data,DVAR,id,dir);

    data:填入你的数据集

    DVAR:填入你的因变量

    id:填入你的数据集的主键

    dir:这个你需要填一个路径,是用来放决策树的规则的文件下,决策树的规则文件你看不懂没关系,你填个类似“F/DD”的路径就可以了。

    例子:%zhandapao(DD.TEST_DATA,y,CUSTOMER_id,D:test_1);

    结果图就是这样子:

    那么今天的更新就到这里啦

     

  • 相关阅读:
    DevExpress VCL for Delphi 各版本收集下载
    Delphi XE 5,Rad Studio XE 5 官方下载(附破解),更新 Update 1,Help Update 1
    PostMessage 向Windows窗口发送Alt组合键
    Windows XP UDF 2.5 补丁,播放蓝光ISO光盘必备
    60个开发者不容错过的免费资源库
    [转]游戏多开的原理
    Delphi加载驱动
    窗口截图
    Drectx 3D窗口后台截图
    利用进程ID获取主线程ID
  • 原文地址:https://www.cnblogs.com/amengduo/p/9586227.html
Copyright © 2011-2022 走看看