FP-growth挖掘算法
步骤一
扫描数据库,扫描数据库一次,得到频繁1-项集,把项按支持度递减排序,再一次扫描数据库,建立FP-tree
步骤二
对每个项,生成它的 条件模式库
步骤三
用条件模式库构造对应的条件FP-tree,递归构造条件 FP-trees 同时增长其包含的频繁集,如果条件FP-tree直包含一个路径,则直接生成所包含的频繁集
C 源码
1 #include<bits/stdc .h> 2 #include<string> 3 #include<algorithm> 4 #include<vector> 5 #include<map> 6 #include<sstream> 7 #define suport 0.001//最小支持度 8 using namespace std; 9 struct FPtreeNode{//孩子兄弟链表法存储FP-tree 10 int data; 11 int count; 12 FPtreeNode *father; 13 FPtreeNode *children; 14 FPtreeNode *brother; 15 FPtreeNode *next; 16 FPtreeNode(){ 17 data=0; 18 count=0; 19 father=NULL; 20 children=NULL; 21 brother=NULL; 22 next=NULL; 23 } 24 }; 25 struct resultNode{//结果 26 string data; 27 int count; 28 resultNode(){ 29 count=0; 30 } 31 }; 32 struct listNode{//头表 33 int data; 34 int count; 35 FPtreeNode *next; 36 }; 37 struct fptreeNode{//递归过程中的子FP-tree 38 int data; 39 int count; 40 fptreeNode(){ 41 data=0; 42 count=0; 43 } 44 }; 45 int line=0; 46 vector<string> Getfile(){ 47 vector<string> file; 48 ifstream ifile("D://retail.txt"); 49 if(!ifile){ 50 cout<<"open file error"<<endl; 51 } 52 else{ 53 string temp; 54 while(getline(ifile,temp)){ 55 line ; 56 file.insert(file.end(),temp); 57 } 58 } 59 ifile.close(); 60 return file; 61 } 62 bool cmp( listNode &a,listNode &b){ 63 return a.count>b.count; 64 } 65 66 vector<listNode> Getheadlist(vector<string> &file){ 67 vector<listNode>L1; 68 map<string,int>r1; 69 string temp; 70 string lk; 71 for(int f=0;f<file.size();f ){//第一次扫描数据库 72 temp=file[f]; 73 int i; 74 for( i=0;i<temp.size();i ){ 75 while(temp[i]!=' '&&temp[i]!='\n'&&i!=temp.size()){ 76 lk =temp[i]; 77 i ; 78 } 79 if(r1.find(lk)!=r1.end()) 80 r1[lk] ; 81 else 82 r1[lk]=1; 83 lk.clear();// 84 } 85 } 86 temp.clear(); 87 map<string,int>::iterator it; 88 for(it=r1.begin();it!=r1.end();it ){//待删除 89 90 if(it->second>=ceil(suport*line)){ 91 string s=it->first; 92 int x=atoi(s.c_str());//转换成整数 93 listNode xx; 94 xx.data=x; 95 xx.count=it->second; 96 xx.next=NULL; 97 L1.insert(L1.end(),xx); 98 } 99 }100 sort(L1.begin(),L1.end(),cmp);101 return L1;102 }103 bool Isin(string temp,string str){104 int j;105 string s;106 for( j=0;j<temp.size();j ){//遍历temp107 while(temp[j]!=' '&&j<temp.size()&&temp[j]!='\n'){108 s.insert(s.end(),temp[j]);109 j ;110 }111 if(s==str)112 break;113 s.clear();114 }115 if(j>=temp.size())116 return 0;117 else118 return 1; 119 120 }121 vector<vector<int> > Get_FPfile(vector<string> &file,vector<listNode> &L1){//第二次扫描数据库 删除非频繁一项 122 string temp;123 vector<vector<int> > rfile;124 vector<int >ri;125 for(int f=0;f<file.size();f ){126 temp=file[f];127 for(int k=0;k<L1.size();k ){128 string s;129 int n=L1[k].data;130 131 stringstream ss;132 ss<<n;133 ss>>s;134 if(Isin(temp,s)){135 ri.push_back(n);136 }137 }138 if(ri.size()>0){139 rfile.push_back(ri);140 ri.clear();141 }142 temp.clear();143 }144 file.clear();145 return rfile;146 }147 int c=0; 148 void Linknext(FPtreeNode *newNode,vector<listNode> &L1){149 for(int m=0;m<L1.size();m ){150 if(L1[m].data==newNode->data){151 if(L1[m].next==NULL){152 L1[m].next=newNode;153 }154 else{155 FPtreeNode *t1=L1[m].next;156 FPtreeNode *t2=L1[m].next;157 while(t1){158 t2=t1;159 t1=t1->next;160 }161 t2->next=newNode; 162 } 163 break;164 }165 } 166 167 }168 FPtreeNode* Buildtree( vector<vector<int> > &rfile,vector<listNode> &L1){169 FPtreeNode *head=new FPtreeNode;170 FPtreeNode *p=head;171 FPtreeNode *q=head;172 int flag=0;173 for(int i=0;i<rfile.size();i ){174 p=head;175 int j=0;176 while(j<rfile[i].size()){177 flag=0; 178 if(i==0){//第一条 179 FPtreeNode *newNode=new FPtreeNode;180 c ;181 newNode->count=1;182 newNode->data=rfile[i][j];183 newNode->father=p;184 p->children=newNode;185 p=p->children; 186 j ; 187 for(int m=0;m<L1.size();m ){188 if(L1[m].data==newNode->data){ 189 L1[m].next=newNode;190 break;191 }192 } 193 } 194 else{195 p=p->children;196 while(p&&j<rfile[i].size()){ 197 if(p->data==rfile[i][j]){198 p->count ;199 q=p;//q->chilren=p;200 p=p->children;201 j ;202 flag=1;203 }204 else{//205 q=p;//q->brother=p;206 p=p->brother;207 flag=2; 208 }209 210 }211 if(flag==1){ 212 while(j<rfile[i].size()){213 FPtreeNode *newNode=new FPtreeNode;214 c ;215 newNode->count=1;216 newNode->father=q;217 q->children=newNode;218 newNode->data=rfile[i][j];219 q=q->children;220 j ;221 //Linknext();222 Linknext(newNode,L1); 223 }224 225 226 }227 else if(flag==2){228 FPtreeNode *newNode=new FPtreeNode;c ;229 newNode->count=1;230 newNode->data=rfile[i][j];231 q->brother=newNode;232 newNode->father=q->father;233 q=q->brother;234 j ;235 Linknext(newNode,L1);236 while(j<rfile[i].size()){237 FPtreeNode *newNode=new FPtreeNode;238 c ;239 newNode->count=1;240 newNode->father=q;241 q->children=newNode;242 newNode->data=rfile[i][j];243 q=q->children;244 j ;245 //Linknext();246 Linknext(newNode,L1);247 }248 249 250 }251 }252 }253 }254 return head;255 }256 vector<string> GetFrequentItems(listNode &lk,FPtreeNode* &head){//生成条件模式基 rfile257 FPtreeNode *p=lk.next;258 vector<string> rfile;259 while(p){260 FPtreeNode* q=p;261 vector<string> temp;262 while(q->father!=head){ 263 q=q->father;264 stringstream ss;265 string x;266 int n;267 n=q->data;268 269 ss<<n;270 ss>>x;271 272 temp.push_back(x " ");273 }274 reverse(temp.begin(),temp.end());275 string s;276 277 for(int j=0;j<temp.size();j ){278 s =temp[j];279 }280 for(int i=0;i<p->count;i ){281 if(s.size()>0)282 rfile.push_back(s); 283 }284 s.clear();285 p=p->next;286 }287 return rfile;288 }289 vector<resultNode> result;290 void Getresult(vector<listNode> &headlist,FPtreeNode* &head,string &base,vector<resultNode> &result){291 if(headlist.empty()){292 return;293 }294 for(auto p = headlist.rbegin(); p != headlist.rend(); p ){295 resultNode temp;296 int n;297 n=p->data;//int to string298 stringstream ss;299 string x;300 ss<<n;301 ss>>x;302 string xx=base " " x;303 temp.data=xx;304 temp.count=p->count;305 result.push_back(temp);306 /*****递归******/307 //产生条件模式基308 vector<string> file1=GetFrequentItems(*p,head);309 310 311 vector<listNode>headlist1= Getheadlist(file1);//getL1312 313 vector<vector<int> > rfile1=Get_FPfile(file1,headlist1);314 315 //建树316 FPtreeNode* Tree=Buildtree(rfile1,headlist1);317 318 string s=base " " x;319 //产生结果320 Getresult(headlist1,Tree,s,result);321 }322 323 }324 void Print(){325 for (auto p =result.cbegin(); p != result.cend(); p )326 {327 cout << p->data << " "<<"("<<p->count<<")"<< endl;328 }329 }330 int main(){331 vector<string> file=Getfile();332 vector<listNode> headlist=Getheadlist(file);333 334 vector<vector<int> >rfile=Get_FPfile(file,headlist);335 336 FPtreeNode* head=Buildtree(rfile,headlist);337 string base=" ";338 339 Getresult(headlist,head,base,result);340 Print();341 cout<<result.size(); 342 return 0;343 }View Code
来源:http://www.icode9.com/content-1-131601.html
联系客服