/*
关联规则Apriori算法思想: 假如有一个大小为20的样本,每个样本包含I1,I2,I3,I4,I5其中的某些属性,假如A样本为{I1,I2,I3},B样本为{I1,I2,I4}之类的,设置最小支持度为2,即选择出来的频繁项集的所有属性至少有2个样本全部包含,比如频繁项集{I1,I2},此时A和B都包含,当然其他的样本也可能包含。那么这个频繁项集是符合的,那么我们可以认为I1和I2的关联性是
比较强的。直到找到大小最大的频繁项集。
*/
#include<cstdio>
#include<iostream>
#include<string>
#include<vector>
#include<cstring>
#include<map>
#include<algorithm>
using namespace std;
#define UINT_MAX 100
class Apriori{
public:
Apriori(size_t is=0,unsigned int mv=0){
item_size=is;
min_value=mv;
}
~Apriori(){}
void getItem();
/**求事务的频繁项**/
map<vector<string>,unsigned int> find_freitem();
/**连接两个k-1级频繁项,得到第k级频繁项**/
map<vector<string>,unsigned int> apri_gen(unsigned int K,map<vector<string>,unsigned int> K_item);
/**展示频繁项集**/
void showAprioriItem(unsigned int K,map<vector<string>,unsigned int> showmap);
private:
map<int,vector<string> > item; /**存储所有最开始的事务及其项**/
map<vector<string>,unsigned int> K_item; /**存储频繁项集**/
size_t item_size; /**事务数**/
unsigned int min_value; /**最小支持度**/
};
void Apriori::getItem(){
int ci=item_size;
for(int i=0;i<ci;i++){
string str;
vector<string> temp;
cout<<"请输入第"<<i+1<<"个事务的项集(123 end):";
while(cin>>str&&str!="123"){
temp.push_back(str);
}
sort(temp.begin(),temp.end());
pair< map<int,vector<string> >::iterator,bool> ret=item.insert(make_pair(i+1,temp));
if(!ret.second){ /**插入失败**/
--i;
cout<<"你输入的元素已存在!请重新输入!"<<endl;
}
}
cout<<"--------------运行结果如下:--------------"<<endl;
}
map<vector<string>,unsigned int> Apriori::find_freitem(){
unsigned int i=1;
bool isEmpty=false;
map<int,vector<string> >::iterator mit;
for(mit=item.begin();mit!=item.end();mit++){
vector<string> vec=mit->second;
if(vec.size()!=0) break; /**找到第一个非空的事务**/
}
if(mit==item.end()){
isEmpty=true;
cout<<"事务集为空!程序无法进行..."<<endl;
map<vector<string>,unsigned int> empty;
return empty;
}
while(1){
map<vector<string>,unsigned int> K_itemTemp=K_item;
K_item=apri_gen(i++,K_item);
if(K_itemTemp==K_item){ /****/
i=UINT_MAX;
break;
}
map<vector<string>,unsigned int> pre_K_item=K_item;
size_t Kitemsize=K_item.size();
if(Kitemsize!=1&&i!=1){
vector<map<vector<string>,unsigned int>::iterator> eraseVecMit;
map<vector<string>,unsigned int>::iterator pre_K_item_it1=pre_K_item.begin(),pre_K_item_it2;
while(pre_K_item_it1!=pre_K_item.end()){
map<vector<string>,unsigned int>::iterator mit=pre_K_item_it1;
bool isExist=true;
vector<string> vec1;
vec1=pre_K_item_it1->first;
vector<string> vec11(vec1.begin(),vec1.end()-1);/**除了最后一项**/
while(mit!=pre_K_item.end()){
vector<string> vec2;
vec2=mit->first;
vector<string> vec22(vec2.begin(),vec2.end()-1);
if(vec11==vec22) break; /**找到第一个相等的**/
++mit;
}
if(mit==pre_K_item.end()) isExist=false; /**不存在**/
if(!isExist&&pre_K_item_it1!=pre_K_item.end()) /**该项可以删除**/
eraseVecMit.push_back(pre_K_item_it1);
++pre_K_item_it1; /**继续下一个**/
}
size_t eraseSetSize=eraseVecMit.size();
if(eraseSetSize==Kitemsize) break; /**频繁项集全部应该被删除,则退出**/
else{
vector<map<vector<string>,unsigned int>::iterator >::iterator currentErs=eraseVecMit.begin();
while(currentErs!=eraseVecMit.end()){ /**删除应该删除的项集**/
map<vector<string>,unsigned int>::iterator eraseMit=*currentErs;
K_item.erase(eraseMit);
++currentErs;
}
}
}
else if(Kitemsize==1) break;/**只剩下一个频繁项集也退出**/
}
cout<<endl;
showAprioriItem(i,K_item);
return K_item;
}
map<vector<string>,unsigned int> Apriori::apri_gen(unsigned int K,map<vector<string>,unsigned int> K_item){
if(1==K){ /**候选集C1**/
size_t c1=item_size;
map<int,vector<string> >::iterator mapit=item.begin();
vector<string> vec;
map<string,unsigned int> c1_itemtemp;
while(mapit!=item.end()){
vector<string> temp=mapit->second; /**事务中的项目**/
vector<string>::iterator vecit=temp.begin();
while(vecit!=temp.end()){
pair<map<string,unsigned int>::iterator,bool> ret=c1_itemtemp.insert(make_pair(*vecit++,1));
if(!ret.second){
++ret.first->second; /**该项目出现的个数加1**/
}
}
++mapit;
}
map<string,unsigned int>::iterator item_it=c1_itemtemp.begin();
map<vector<string>,unsigned int> c1_item;
while(item_it!=c1_itemtemp.end()){
vector<string> temp;
if(item_it->second>=min_value){ /**大于最小支持度的才加入到1-频繁项目集**/
temp.push_back(item_it->first);
c1_item.insert(make_pair(temp,item_it->second));
}
++item_it;
}
return c1_item;
}
else{
cout<<endl;
showAprioriItem(K-1,K_item); /**显示(k-1)-频繁项目集**/
map<vector<string>,unsigned int>::iterator ck_item_it1=K_item.begin(),ck_item_it2;
map<vector<string>,unsigned int> ck_item;
while(ck_item_it1!=K_item.end()){
ck_item_it2=ck_item_it1;
++ck_item_it2;
map<vector<string>,unsigned int>::iterator mit=ck_item_it2;
while(mit!=K_item.end()){
vector<string> vec,vec1,vec2;
vec1=ck_item_it1->first;
vec2=mit->first;
vector<string>::iterator vit1,vit2;
vit1=vec1.begin();
vit2=vec2.begin();
while(vit1<vec1.end()&&vit2<vec2.end()){
string str1=*vit1;
string str2=*vit2;
++vit1;
++vit2;
if(K==2||str1==str2){
if(vit1!=vec1.end()&&vit2!=vec2.end()){
vec.push_back(str1);
}
}
else break;
}
if(vit1==vec1.end()&&vit2==vec2.end()){ /**前K-1项相同**/
--vit1;
--vit2;
string str1=*vit1;
string str2=*vit2;
if(str1>str2){ /**再插入**/
vec.push_back(str2);
vec.push_back(str1);
}
else{
vec.push_back(str1);
vec.push_back(str2);
}
map<int,vector<string> >::iterator base_item=item.begin();
unsigned int Acount=0;
while(base_item!=item.end()){
unsigned int count=0,mincount=UINT_MAX;
vector<string> vv=base_item->second;
vector<string>::iterator vecit,bvit;
for(vecit=vec.begin();vecit<vec.end();vecit++){
string t=*vecit;
count=0;
for(bvit=vv.begin();bvit<vv.end();bvit++){
if(t==*bvit) count++;
}
mincount=(count<mincount?count:mincount);
}
if(mincount>=1&&mincount!=UINT_MAX) /**该项集是该事务的子集**/
Acount+=mincount;
++base_item;
}
if(Acount>=min_value&&Acount!=0){ /**大于等于最小支持度**/
sort(vec.begin(),vec.end());
pair<map<vector<string>,unsigned int>::iterator,bool> ret=ck_item.insert(make_pair(vec,Acount));
if(!ret.second){
ret.first->second+=Acount;
}
}
}
++mit;
}
++ck_item_it1;
}
if(ck_item.empty()) return K_item;
else return ck_item;
}
}
void Apriori::showAprioriItem(unsigned int K,map<vector<string>,unsigned int> showmap){
map<vector<string>,unsigned int>::iterator showit=showmap.begin();
if(K!=UINT_MAX) cout<<endl<<"第 "<<K<<" 级频繁项集为:"<<endl;
else cout<<"最终的频繁项集为:"<<endl;
cout<<"项 集"<<" "<<"频率"<<endl;
while(showit!=showmap.end()){
vector<string> vec=showit->first;
vector<string>::iterator vecit=vec.begin();
cout<<"{";
while(vecit!=vec.end()){
cout<<*vecit<<" ";
++vecit;
}
cout<<"}"<<" ";
cout<<showit->second<<endl;
++showit;
}
}
unsigned int parseNumber(const char *str){
if(str==NULL) return 0;
else{
unsigned int num=0;
size_t len=strlen(str);
for(size_t i=0;i<len;i++){
num*=10;
if(str[i]>='0'&&str[i]<='9') num+=str[i]-'0';
else return 0;
}
return num;
}
}
int main(){
/*
unsigned int itemsize=0;
unsigned int min;
do{
cout<<"请输入事务数:";
char *str=new char;
cin>>str;
itemsize=parseNumber(str); //事务数
if(itemsize==0){
cout<<"请输入大于0正整数!"<<endl;
}
}while(itemsize==0);
do{
cout<<"请输入最小阈值:";
char *str=new char;
cin>>str;
min=parseNumber(str); //最小支持度
if(min==0){
cout<<"请输入大于0正整数!"<<endl;
}
}while(min==0);
Apriori a(itemsize,min);
a.getItem();
map<vector<string>,unsigned int> AprioriMap=a.find_freitem(); //找到频繁项目集
a.showAprioriItem(UINT_MAX,AprioriMap);
*/
return 0;
}