zoukankan      html  css  js  c++  java
  • 决策表快速排序

    一、说明。

    所谓决策表,类似于关系数据库的二位数据表,形如:
    4 3 0
    1 0 1
    8 1 0
    1 2 0
    1 2 1
    7 3 1
    7 4 0

    排序后输出:

    1 0 1
    1 2 0
    1 2 1
    4 3 0
    7 3 1
    7 4 0
    8 1 0

    二、问题由来。
    决策表约简是粗糙集的一个经典问题。
    关于如何解释粗糙集约简问题,我有一个很简单的解释,不过不会在这里写出。
    简而言之约简就是在保持原有数据集分类能力的前提下删除冗余属性。
    粗糙集的创始者Pawlak有着一个近乎偏执的理念:知识就是分类。

    完成分类是进一步完成粗糙集约简的基础。
    所以针对如何分类就有了各种各样的解法。

    蛮力算法就是两两比较,完成分类,这个复杂度很高。
    在这种情况下,先排序再分类是一个进步的方法。
    当然排序的方法也很多,基数排序、快速排序都是排序,也的确都有人进行过尝试。

    我这里的这个排序方法来自于《计算机学报》上的一篇《属性序下的快速约简算法》。
    文章的作者当时发了两篇文章,这篇约简的文章建立在另外一篇《二维表快速排序的复杂度分析》之上。

    这里我只是简单实现了原文算法。

    三、实现代码,只是想重复这个实验,然后用我的方法与此相比较。

      1 #include <stdlib.h>
      2 #include <string.h>
      3 #include <stdio.h>
      4 #include <math.h>
      5 #include <time.h>
      6 #include <windows.h>
      7 #include "decTable.h"
      8 
      9 const int AttOrderTerminator = -1;
     10 
     11 typedef struct tagConditionClass{
     12     int deputyRowNO; // start index NO. in tblIdx.
     13     int terminalRowNO; // terminal index NO. in tblIdx.
     14     bool available; // if cdncls has >= 2 decision value, it is not available.
     15 }ConditionClass;
     16 
     17 struct tagDecisionTableEX{
     18     DecisionTable * table;
     19     int * tblIdx;
     20     int from;
     21     int to;
     22 };
     23 typedef tagDecisionTableEX DecisionTableEX;
     24 
     25 int partition(DecisionTable * table, int * tblIdx, int stage, int low, int high);
     26 int TDQuicksort(DecisionTable * table, int * tblIdx, int stage, int low, int high);
     27 int loadDecisionTablePositiveRegion(DecisionTable * table, int * tblIdx, bool * tblPositiveRegion);
     28 int partitionMatrix(DecisionTableEX * tex, int * attOrder, int stage, int * nonEmptyLabel, bool * tblPositiveRegion);
     29 int attOrderReduction();
     30 
     31 int partition(DecisionTable * table, int * tblIdx, int stage, int low, int high){
     32     TableElement * s = table->dataCenter;
     33     int ext = table->extCdnAttribCount;
     34     int t;
     35 
     36     int mid = low;
     37     int hiEnd = mid+1;
     38     int counter = 0;
     39     for(int i=low+1; i<=high; i++){
     40         int ref = s[tblIdx[low] * ext + stage];
     41         int element = s[tblIdx[i] * ext + stage];
     42         if (element < ref){
     43             mid++;
     44             hiEnd++;
     45             t = tblIdx[mid];
     46             tblIdx[mid] = tblIdx[i];
     47             tblIdx[i] = t;
     48         }
     49         if(element == ref){
     50             t = tblIdx[i];
     51             tblIdx[i] = tblIdx[hiEnd];
     52             tblIdx[hiEnd] = t;
     53             hiEnd++;
     54             counter++;
     55         }
     56     }
     57 
     58     t = tblIdx[low];
     59     tblIdx[low] = tblIdx[mid];
     60     tblIdx[mid] = t;
     61 
     62     if (mid == low) return mid + counter;
     63 
     64     return mid-1;
     65 }
     66 
     67 int TDQuicksort(DecisionTable * table, int * tblIdx, int stage, int low, int high){
     68     TableElement * s = table->dataCenter;
     69     int ext = table->extCdnAttribCount;
     70     
     71     if (stage > table->cdnAttributeCount) return 0;
     72     if (low >= high) return 0;
     73 
     74     bool NextDemension = false;
     75     for (int i=low+1; i<=high; i++)
     76     if ( s[tblIdx[i] * ext + stage] != s[tblIdx[low] * ext + stage]){
     77         NextDemension = true;
     78         break;
     79     }
     80 
     81     if (NextDemension){
     82         int mid = partition(table, tblIdx, stage, low, high);
     83         TDQuicksort(table, tblIdx, stage, low, mid);
     84         TDQuicksort(table, tblIdx, stage, mid+1, high);
     85     }
     86 
     87     if (!NextDemension){
     88         TDQuicksort(table, tblIdx, stage+1, low, high);
     89     }
     90 
     91     return 0;
     92 }
     93 
     94 int loadDecisionTablePositiveRegion(DecisionTable * table, int * tblIdx, bool * tblPositiveRegion){
     95     int CdnEquClsNO = table->elementCount+2;
     96     int cdnClsPointer = 0;
     97 
     98     int cdn = table->cdnAttributeCount;
     99     int ext = table->extCdnAttribCount;
    100     int tfsi = table->elementCount;
    101     
    102     ConditionClass * cdnCls = NULL;
    103     HANDLE heap = NULL;
    104 
    105     int cc = table->cdnCmp;
    106 
    107     heap = HeapCreate(HEAP_NO_SERIALIZE|HEAP_GENERATE_EXCEPTIONS, 1024*1024, 0);
    108     if (heap != NULL){
    109         cdnCls = (ConditionClass * )HeapAlloc(heap, 0, CdnEquClsNO * sizeof(ConditionClass));
    110     }
    111     MakeSure(cdnCls != NULL);
    112     SecureZeroMemory(cdnCls, CdnEquClsNO * sizeof(ConditionClass));
    113 
    114     int from = 0;
    115     while(from < tfsi){
    116         int duplicate = 0;
    117         int i=from;
    118         BigInt * src64 = (BigInt *)(table->dataCenter + tblIdx[from] * ext);
    119 
    120         cdnCls[cdnClsPointer].deputyRowNO = from; // index NO. in tblIdx
    121         cdnCls[cdnClsPointer].available = true;
    122 
    123         // while (Line[from] == Line[i]) {...}
    124         while (true){
    125             bool bird = true;
    126             if (i == tfsi) break;
    127             BigInt * dst64 = (BigInt *)(table->dataCenter + tblIdx[i] * ext);
    128             for(int m=0; m<cc; m++)
    129             if(src64[m]^dst64[m]){
    130                 bird = false;
    131                 break;
    132             }
    133             if (!bird) break;
    134 
    135             if (cdnCls[cdnClsPointer].available == true)
    136             if (table->dcnElement[tblIdx[i]] != table->dcnElement[tblIdx[from]]){
    137                 cdnCls[cdnClsPointer].available = false;
    138             }
    139             
    140             i++;
    141             duplicate++;
    142         }
    143 
    144         from += duplicate;
    145         cdnCls[cdnClsPointer].terminalRowNO = from - 1;
    146         cdnClsPointer++;
    147     }
    148 
    149     for (int i=0; i<cdnClsPointer; i++){
    150         int start = cdnCls[i].deputyRowNO;
    151         int terminal = cdnCls[i].terminalRowNO;
    152         if (cdnCls[i].available){
    153             for (int m=start; m<=terminal; m++) tblPositiveRegion[tblIdx[m]] = true;
    154         }
    155         if (!cdnCls[i].available){
    156             for (int m=start; m<=terminal; m++) tblPositiveRegion[tblIdx[m]] = false;
    157         }
    158     }
    159 
    160     HeapFree(heap, 0, cdnCls);
    161     HeapDestroy(heap);
    162 
    163     return 0;
    164 }
    165 
    166 int partitionMatrix(DecisionTableEX * tex, int * attOrder, int stage, int * nonEmptyLabel, bool * tblPositiveRegion){
    167     DecisionTable * table = tex->table;
    168     int * tblIdx = tex->tblIdx;
    169     int from = tex->from;
    170     int to = tex->to;
    171     
    172     while (attOrder[stage] != AttOrderTerminator){
    173         if (from >= to) return 0;
    174         
    175         bool noPRelement = true;
    176         for (int i=from; i<=to; i++)
    177         if (tblPositiveRegion[tblIdx[i]]){
    178             noPRelement = false;
    179             break;
    180         }
    181         if(noPRelement) return 0;
    182 
    183         bool cannotDistinguishInStage = true;
    184         int ext = table->extCdnAttribCount;
    185         TableElement * s = table->dataCenter;        
    186         for (int i=from; i<=to; i++)
    187         if (s[tblIdx[i] * ext + attOrder[stage]] != s[tblIdx[from] * ext + attOrder[stage]]){
    188             cannotDistinguishInStage = false;
    189             break;
    190         }
    191         if (cannotDistinguishInStage) partitionMatrix(tex, attOrder, stage+1, nonEmptyLabel, tblPositiveRegion);
    192 
    193         if (cannotDistinguishInStage == false){
    194             nonEmptyLabel[stage] = 1;
    195             int sum = 0;
    196             double avg = 0;
    197             for (int i=from; i<=to; i++) sum += s[tblIdx[i] * ext + attOrder[stage]];
    198             avg = ((double)sum) / (to - from + 1);
    199             int mid = from -1;
    200             for (int i=from; i<=to; i++){
    201                 if (s[tblIdx[i] * ext + attOrder[stage]] <= avg){
    202                     mid++;
    203                     int t = tblIdx[mid];
    204                     tblIdx[mid] = tblIdx[i];
    205                     tblIdx[i] = t;
    206                 }
    207             }
    208             //if (mid==from || mid==to) __debugbreak();
    209             //int mid = partition(table, tblIdx, attOrder[stage], from, to);
    210 
    211             tex->from = from;
    212             tex->to = mid;
    213             partitionMatrix(tex, attOrder, stage, nonEmptyLabel, tblPositiveRegion);
    214         
    215             tex->from = mid+1;
    216             tex->to = to;
    217             partitionMatrix(tex, attOrder, stage, nonEmptyLabel, tblPositiveRegion);
    218         }
    219 
    220         //printf("%d stage completed.
    ", stage);
    221         stage++;
    222     }
    223 
    224     return 0;
    225 }
    226 
    227 int attOrderReduction(){
    228     DecisionTable table;
    229     time_t timeBegin;
    230     time_t timeEnd;
    231     char fileName[MAX_STR];
    232 
    233     beginDecisionTable(&table);
    234     
    235     printf("
    Input data file name : ");
    236     scanf_s("%s", fileName, MAX_STR);
    237     strcat_s(fileName, MAX_STR, ".txt");
    238     timeBegin = clock();
    239     fillTableWithFile(&table, fileName);
    240     timeEnd = clock();
    241     printf("
    %f(s) consumed in reading from file", (double)(timeEnd-timeBegin)/CLOCKS_PER_SEC);
    242     printf("
    ");
    243 
    244     //reduction main
    245     timeBegin = clock();
    246     int * tblIdx = (int *)malloc(table.elementCount * sizeof(int));
    247     bool * tblPositiveRegion = (bool *)malloc(table.elementCount * sizeof(bool));
    248     for (int i=0; i<table.elementCount; i++) tblIdx[i]=i;
    249     
    250     TDQuicksort(&table, tblIdx, 0, 0, table.elementCount-1);
    251     //TDQuicksort test code
    252     //FILE * fp;
    253     //fopen_s(&fp, "r8.txt", "w+");
    254     //for (int i=0; i<table.elementCount; i++){
    255     //    for (int m=0; m<table.cdnAttributeCount; m++)
    256     //        fprintf(fp, "%8d", table.dataCenter[ tblIdx[i]*table.extCdnAttribCount + m ]);
    257     //    fprintf(fp, "
    ");
    258     //}
    259     //fclose(fp);
    260     
    261     return 0;
    262 }
    263 
    264 int main(){
    265     attOrderReduction();
    266 
    267     return 0;
    268 }
    View Code

    四、实验结果摘录
    所有实验数据来自UCI数据库。
    实验机器: i3 2100 + 4G + win7 32bit
    VS 2012 32bit Release Mode

    Forest CoverType
    581012 条数据,每数据 54 条件属性。
    0.234s

    Poker Hand
    1025010 数据, 每数据 10条件属性。
    0.359s

  • 相关阅读:
    【BIEE】01_下载安装BIEE(Business Intelligence)11g 11.1.1.9.0
    【Excle数据透视表】如何按照地区交替填充背景颜色
    【Excle数据透视表】如何利用图标集将销售数据划分为五个等级
    【Excle数据透视表】如何将价格小于5000的显示为红色“不达标”
    【Excle数据透视表】如何让字段标题不显示“求和项”
    【Excle】如何隐藏数据透视表中的错误值
    使用虚拟机运行Ubuntu时,主机与宿主机共享文件的方法。
    mount命令汇总
    虚拟机网络模式
    linux(虚拟机中)与windows共享文件两种方法
  • 原文地址:https://www.cnblogs.com/servo/p/3321572.html
Copyright © 2011-2022 走看看