- 基本Apriori算法:
主要思想是:连接步+Apriori性质
- 统计单项在的支持度,再利用连接步生成2项;
- 对2项采取Apriori性质剪枝,得到可能的2项;
- 再将2项的再原始中统计得出其支持度,并减去达不到支持度的项。
- 按照上面步骤重复,直到不能产生新的更多项。
因为从网上找的程序都不太好调试,特贴出自己编写的程序,当然这个针对的是固定的一个对象,通过修改能达到同样的目的,欢迎指出错误!
主程序:
#include <iostream>
#include <string>
#include <math.h>
#include "Apri.h"
int main(){
Apri aa(2);
First_Iteam First_set[9];
First_set[0].name = "T100";
First_set[0].Iteams[0] = "I1";
First_set[0].Iteams[1] = "I2";
First_set[0].Iteams[2] = "I5";
First_set[1].name = "T200";
First_set[1].Iteams[0] = "I2";
First_set[1].Iteams[1] = "I4";
First_set[2].name = "T300";
First_set[2].Iteams[0] = "I2";
First_set[2].Iteams[1] = "I3";
First_set[3].name = "T400";
First_set[3].Iteams[0] = "I1";
First_set[3].Iteams[1] = "I2";
First_set[3].Iteams[2] = "I4";
First_set[4].name = "T500";
First_set[4].Iteams[0] = "I1";
First_set[4].Iteams[1] = "I3";
First_set[5].name = "T600";
First_set[5].Iteams[0] = "I2";
First_set[5].Iteams[1] = "I3";
First_set[6].name = "T700";
First_set[6].Iteams[0] = "I1";
First_set[6].Iteams[1] = "I3";
First_set[7].name = "T800";
First_set[7].Iteams[0] = "I1";
First_set[7].Iteams[1] = "I2";
First_set[7].Iteams[2] = "I3";
First_set[7].Iteams[3] = "I5";
First_set[8].name = "T900";
First_set[8].Iteams[0] = "I1";
First_set[8].Iteams[1] = "I2";
First_set[8].Iteams[2] = "I3";
aa.setFirstSet(First_set);
aa.Find_frequent_1_itemsets();
aa.print();
aa.apriori_gen();
cin.get();
return 0;
}
Apriori类
.h文件:
#include <iostream>
#include <string>
using namespace std;
struct First_Iteam{
string name;
string Iteams[5];
};
struct Ci{
string Items[5];
int confident;
};
class Apri{
private:
First_Iteam First_set[9];//存放事物,相当于从数据库中读取的信息,固定为9
Ci Ck_Items[100];//计算的K-1的统计信息
Ci Cl_Items[100];//新计算的第k个统计信息
int Items_k;//k-1的统计信息的数目
int Items_l;//k个统计信息的数目
int Gen;//现在的代数,开始值为1
int min_sup;//最小支持度
public:
Apri(int);
void print();
void setFirstSet(First_Iteam*);
void Find_frequent_1_itemsets();//查找第一代的数据及其信息
void Find_ck(string item);//查找是否在K-1中存在此对象
void apriori_gen();//连接步,将K-1中的数据连接成K的数据
bool Apriori_check(Ci);//在K-1中检查一个k-1的字符串是否达到了min_sup,或者是否存在这样的K-1
bool Ci_Equal(Ci,Ci);//判断两个串是否相等
int ContainCi(Ci);//判断在原集合中对Ci的支持度
};
.cpp文件:
#include "Apri.h"
Apri::Apri(int a){
min_sup = a;
}
void Apri::print(){
cout << "原始项目集:" << endl;
for(int i = 0; i < 9; i++)
{
cout << First_set[i].name << ": ";
for( int j = 0; j < 5; j++)
if(First_set[i].Iteams[j] != "")
cout << First_set[i].Iteams[j] << " ";
cout << endl;
}
}
void Apri::setFirstSet(First_Iteam a[]){
for(int i = 0; i < 9; i++)
First_set[i] = a[i];
}
void Apri::Find_frequent_1_itemsets(){
Gen = 1;
Items_k = 0;
for(int i = 0; i < 9; i++)
{
int j = 0;
while(First_set[i].Iteams[j] != ""){
Find_ck(First_set[i].Iteams[j]);
j++;
}
}
}
//查找是否在第一次中存在这样的项,并统计信息,
//在统计信息时要做到判断准确,不能失误。
void Apri::Find_ck(string item){
int i;
bool isbreak = false;
for(i = 0; i < Items_k; i++)
if(item == Ck_Items[i].Items[0])
{
isbreak =!isbreak;
break;
}
if(isbreak)
Ck_Items[i].confident++;
else
{
Ck_Items[i].Items[0] = item;
Ck_Items[i].confident = 1;
Items_k++;
}
}
void Apri::apriori_gen(){
while(Items_k != 0)
{
cout << "-----------------" << "L" << Gen << "------------------" << endl;
for(int i = 0; i < Items_k; i++)
{
for(int j = 0; j < 5; j++)
if(Ck_Items[i].Items[j] != "")
cout << Ck_Items[i].Items[j] <<" ";
cout << Ck_Items[i].confident<< endl;
}
Items_l = 0;
for(int i = 0; i < Items_k-1; i++)
{
for(int j = i+1; j < Items_k; j++)
{
bool isLink = true;
//判断是否能够连接,前面k-2个都相同,但是K-1是不同的
for(int m = 0; m < Gen-1; m++)
{
if(Ck_Items[i].Items[m] != Ck_Items[j].Items[m])
{
isLink = false;
break;
}
}
if(isLink)
{
if(Ck_Items[i].Items[Gen-1] != Ck_Items[j].Items[Gen-1])
//连接条件成立,进行连接;
{
Ci newItem;//生成新连接后得到的串
for(int n = 0; n < Gen; n++)
newItem.Items[n] = Ck_Items[i].Items[n];
newItem.Items[Gen] = Ck_Items[j].Items[Gen-1];
//对新生成的串进行数据Apriori剪枝,判断是否合法
Ci AllItem;//生成k的所有的k-1子串,并进行检查
bool ThroughApriori = true;//是否通过Apriori检查
for(int g = 0; g <= Gen; g++)
{
int start = 0;
for(int h=0; h <= Gen; h++)
{
if(h != g)
{
AllItem.Items[start] = newItem.Items[h];
start++;
}
}
//对K-1的子串检查是否达到min_sup
if(!Apriori_check(AllItem))
{
ThroughApriori = false;
break;
}
}
if(ThroughApriori)//通过了Apriori性质检查,将此条加入到K中
{
Cl_Items[Items_l] = newItem;
Cl_Items[Items_l].confident = 0;
Items_l++;
}
}
}
}
}
Gen++;
//接下来处理L的数据的支持度,数据存放在Cl_Items中
for(int i = 0; i < Items_l; i++)
{
Cl_Items[i].confident = ContainCi(Cl_Items[i]);
}
//剪去小于最小支持度的并将CL_Items中的数据转移到Ck_Items中
for(int m = 0; m < Items_k; m++)
{
for(int j = 0; j < 5; j++)
Ck_Items[m].Items[j] = "";
Ck_Items[m].confident = 0;
}
int new_Items_l = 0;
for(int i = 0; i < Items_l; i++)
{
if(Cl_Items[i].confident >= min_sup)
{
for(int j = 0; j < 5; j++)
Ck_Items[new_Items_l].Items[j] = Cl_Items[i].Items[j];
Ck_Items[new_Items_l].confident = Cl_Items[i].confident;
new_Items_l++;
}
}
Items_k = new_Items_l;
Items_l = 0;
}
}
//当检查时不存在这样的K-1或者存在相等的K-1但是支持度小于min_sup则返回falsse
bool Apri::Apriori_check(Ci OldCi){
bool pass = false;
for(int i = 0; i < Items_k; i++)
{
/*if(Ci_Equal(OldCi,Ck_Items[i] && Ck_Items[i].confident < min_sup)
{
pass = false;
break;
}
*/
if(Ci_Equal(OldCi,Ck_Items[i]))
{
pass = true;
break;
}
}
return pass;
}
//判断该两个Ci是否相等
bool Apri::Ci_Equal(Ci a,Ci b){
bool isequal = false;
int num_a = 0,num_b = 0;
while(a.Items[num_a] != "")
num_a++;
while(b.Items[num_b] != "")
num_b++;
if(num_a != num_b)
return false;
for(int i = 0;i < Gen; i++)
{
isequal = false;
for(int j = 0; j < Gen; j++)
if(a.Items[i] == b.Items[j])
{
isequal = true;
break;
}
if(!isequal)
break;
}
return isequal;
}
int Apri::ContainCi(Ci NewCi){
int num_new = 0, num_item, num_contain = 0;
while(NewCi.Items[num_new] != "")
num_new++;
bool isequal = false;
for(int i = 0; i < 9; i++)
{
num_item = 0;
while(First_set[i].Iteams[num_item] != "")
num_item++;
if(num_new > num_item)
continue;
else
{
for(int j = 0; j < num_new; j++)
{
isequal = false;
for(int m = 0; m < num_item; m++)
{
if(NewCi.Items[j] == First_set[i].Iteams[m])
{
isequal = true;
break;
}
}
if(!isequal)
break;
}
if(isequal)
num_contain++;
}
}
return num_contain;
}
宿舍