上千万或上亿数据（有反复），统计当中出现次数最多的N个数据. C++实现

上千万或上亿的数据，如今的机器的内存应该能存下。所以考虑採用map/hash_map/搜索二叉树/红黑树等来进行统计次数。

然后就是取出前N个出现次数最多的数据了，能够用第2题提到的堆机制完毕。

方法一:使用hash_map和multimap统计

#include "IOSTREAM"#include<hash_map>#include<string>#include<map>using namespace std;int main(void){	//海量待统计数据	char* a[5]={"ab","b","ccc","ab","ccc"};	//哈希映射统计频率	hash_map<char *,int> hp;	for(int i=0;i<5;i++)	{		if(hp.find(a[i])!=hp.end())		{			hp[a[i]]++;		}		else		{			hp[a[i]]=1;		}	}	//对字符串按出现频率排序	multimap<int,char*> m;	hash_map<char*,int>::iterator it;	for(it=hp.begin();it!=hp.end();it++)		m.insert(pair<int,char*>(it->second,it->first));	//输出出现频率最高的两个字符串	multimap<int,char*>::iterator t=m.end();		for(int i=1;i<=2;i++)	{		t--;		cout<<t->second<<endl;	}}

方法二：使用STL的map(map内部自建一颗红黑树(一种自平衡二叉查找树)，这颗树具有对数据自动排序的功能，所以在map内部所有的数据都是有序的)

int main(int, char **)
{
    map<string, int> dict;
    string s;
    while (cin >> s) {
        ++dict[s];
    }
    map<string, int>::iterator it = dict.begin();
    for (; it != dict.end();it++) {
        cout <<it->first << ":" << it->second << endl;
    }
}

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。