- 基本Apriori算法:
主要思想是:连接步+Apriori性质
- 统计单项在的支持度,再利用连接步生成2项;
- 对2项采取Apriori性质剪枝,得到可能的2项;
- 再将2项的再原始中统计得出其支持度,并减去达不到支持度的项。
- 按照上面步骤重复,直到不能产生新的更多项。
因为从网上找的程序都不太好调试,特贴出自己编写的程序,当然这个针对的是固定的一个对象,通过修改能达到同样的目的,欢迎指出错误!
主程序:
#include <iostream> #include <string> #include <math.h> #include "Apri.h" int main(){ Apri aa(2); First_Iteam First_set[9]; First_set[0].name = "T100"; First_set[0].Iteams[0] = "I1"; First_set[0].Iteams[1] = "I2"; First_set[0].Iteams[2] = "I5"; First_set[1].name = "T200"; First_set[1].Iteams[0] = "I2"; First_set[1].Iteams[1] = "I4"; First_set[2].name = "T300"; First_set[2].Iteams[0] = "I2"; First_set[2].Iteams[1] = "I3"; First_set[3].name = "T400"; First_set[3].Iteams[0] = "I1"; First_set[3].Iteams[1] = "I2"; First_set[3].Iteams[2] = "I4"; First_set[4].name = "T500"; First_set[4].Iteams[0] = "I1"; First_set[4].Iteams[1] = "I3"; First_set[5].name = "T600"; First_set[5].Iteams[0] = "I2"; First_set[5].Iteams[1] = "I3"; First_set[6].name = "T700"; First_set[6].Iteams[0] = "I1"; First_set[6].Iteams[1] = "I3"; First_set[7].name = "T800"; First_set[7].Iteams[0] = "I1"; First_set[7].Iteams[1] = "I2"; First_set[7].Iteams[2] = "I3"; First_set[7].Iteams[3] = "I5"; First_set[8].name = "T900"; First_set[8].Iteams[0] = "I1"; First_set[8].Iteams[1] = "I2"; First_set[8].Iteams[2] = "I3"; aa.setFirstSet(First_set); aa.Find_frequent_1_itemsets(); aa.print(); aa.apriori_gen(); cin.get(); return 0; }
Apriori类
.h文件:
#include <iostream> #include <string> using namespace std; struct First_Iteam{ string name; string Iteams[5]; }; struct Ci{ string Items[5]; int confident; }; class Apri{ private: First_Iteam First_set[9];//存放事物,相当于从数据库中读取的信息,固定为9 Ci Ck_Items[100];//计算的K-1的统计信息 Ci Cl_Items[100];//新计算的第k个统计信息 int Items_k;//k-1的统计信息的数目 int Items_l;//k个统计信息的数目 int Gen;//现在的代数,开始值为1 int min_sup;//最小支持度 public: Apri(int); void print(); void setFirstSet(First_Iteam*); void Find_frequent_1_itemsets();//查找第一代的数据及其信息 void Find_ck(string item);//查找是否在K-1中存在此对象 void apriori_gen();//连接步,将K-1中的数据连接成K的数据 bool Apriori_check(Ci);//在K-1中检查一个k-1的字符串是否达到了min_sup,或者是否存在这样的K-1 bool Ci_Equal(Ci,Ci);//判断两个串是否相等 int ContainCi(Ci);//判断在原集合中对Ci的支持度 };
.cpp文件:
#include "Apri.h" Apri::Apri(int a){ min_sup = a; } void Apri::print(){ cout << "原始项目集:" << endl; for(int i = 0; i < 9; i++) { cout << First_set[i].name << ": "; for( int j = 0; j < 5; j++) if(First_set[i].Iteams[j] != "") cout << First_set[i].Iteams[j] << " "; cout << endl; } } void Apri::setFirstSet(First_Iteam a[]){ for(int i = 0; i < 9; i++) First_set[i] = a[i]; } void Apri::Find_frequent_1_itemsets(){ Gen = 1; Items_k = 0; for(int i = 0; i < 9; i++) { int j = 0; while(First_set[i].Iteams[j] != ""){ Find_ck(First_set[i].Iteams[j]); j++; } } } //查找是否在第一次中存在这样的项,并统计信息, //在统计信息时要做到判断准确,不能失误。 void Apri::Find_ck(string item){ int i; bool isbreak = false; for(i = 0; i < Items_k; i++) if(item == Ck_Items[i].Items[0]) { isbreak =!isbreak; break; } if(isbreak) Ck_Items[i].confident++; else { Ck_Items[i].Items[0] = item; Ck_Items[i].confident = 1; Items_k++; } } void Apri::apriori_gen(){ while(Items_k != 0) { cout << "-----------------" << "L" << Gen << "------------------" << endl; for(int i = 0; i < Items_k; i++) { for(int j = 0; j < 5; j++) if(Ck_Items[i].Items[j] != "") cout << Ck_Items[i].Items[j] <<" "; cout << Ck_Items[i].confident<< endl; } Items_l = 0; for(int i = 0; i < Items_k-1; i++) { for(int j = i+1; j < Items_k; j++) { bool isLink = true; //判断是否能够连接,前面k-2个都相同,但是K-1是不同的 for(int m = 0; m < Gen-1; m++) { if(Ck_Items[i].Items[m] != Ck_Items[j].Items[m]) { isLink = false; break; } } if(isLink) { if(Ck_Items[i].Items[Gen-1] != Ck_Items[j].Items[Gen-1]) //连接条件成立,进行连接; { Ci newItem;//生成新连接后得到的串 for(int n = 0; n < Gen; n++) newItem.Items[n] = Ck_Items[i].Items[n]; newItem.Items[Gen] = Ck_Items[j].Items[Gen-1]; //对新生成的串进行数据Apriori剪枝,判断是否合法 Ci AllItem;//生成k的所有的k-1子串,并进行检查 bool ThroughApriori = true;//是否通过Apriori检查 for(int g = 0; g <= Gen; g++) { int start = 0; for(int h=0; h <= Gen; h++) { if(h != g) { AllItem.Items[start] = newItem.Items[h]; start++; } } //对K-1的子串检查是否达到min_sup if(!Apriori_check(AllItem)) { ThroughApriori = false; break; } } if(ThroughApriori)//通过了Apriori性质检查,将此条加入到K中 { Cl_Items[Items_l] = newItem; Cl_Items[Items_l].confident = 0; Items_l++; } } } } } Gen++; //接下来处理L的数据的支持度,数据存放在Cl_Items中 for(int i = 0; i < Items_l; i++) { Cl_Items[i].confident = ContainCi(Cl_Items[i]); } //剪去小于最小支持度的并将CL_Items中的数据转移到Ck_Items中 for(int m = 0; m < Items_k; m++) { for(int j = 0; j < 5; j++) Ck_Items[m].Items[j] = ""; Ck_Items[m].confident = 0; } int new_Items_l = 0; for(int i = 0; i < Items_l; i++) { if(Cl_Items[i].confident >= min_sup) { for(int j = 0; j < 5; j++) Ck_Items[new_Items_l].Items[j] = Cl_Items[i].Items[j]; Ck_Items[new_Items_l].confident = Cl_Items[i].confident; new_Items_l++; } } Items_k = new_Items_l; Items_l = 0; } } //当检查时不存在这样的K-1或者存在相等的K-1但是支持度小于min_sup则返回falsse bool Apri::Apriori_check(Ci OldCi){ bool pass = false; for(int i = 0; i < Items_k; i++) { /*if(Ci_Equal(OldCi,Ck_Items[i] && Ck_Items[i].confident < min_sup) { pass = false; break; } */ if(Ci_Equal(OldCi,Ck_Items[i])) { pass = true; break; } } return pass; } //判断该两个Ci是否相等 bool Apri::Ci_Equal(Ci a,Ci b){ bool isequal = false; int num_a = 0,num_b = 0; while(a.Items[num_a] != "") num_a++; while(b.Items[num_b] != "") num_b++; if(num_a != num_b) return false; for(int i = 0;i < Gen; i++) { isequal = false; for(int j = 0; j < Gen; j++) if(a.Items[i] == b.Items[j]) { isequal = true; break; } if(!isequal) break; } return isequal; } int Apri::ContainCi(Ci NewCi){ int num_new = 0, num_item, num_contain = 0; while(NewCi.Items[num_new] != "") num_new++; bool isequal = false; for(int i = 0; i < 9; i++) { num_item = 0; while(First_set[i].Iteams[num_item] != "") num_item++; if(num_new > num_item) continue; else { for(int j = 0; j < num_new; j++) { isequal = false; for(int m = 0; m < num_item; m++) { if(NewCi.Items[j] == First_set[i].Iteams[m]) { isequal = true; break; } } if(!isequal) break; } if(isequal) num_contain++; } } return num_contain; }
宿舍