具体可见:http://en.wikipedia.org/wiki/K-means_clustering
图1
1: public class KMeansMapper extends Mapper<WritableComparable<?>, VectorWritable, Text, ClusterObservations> {
2:
3: private KMeansClusterer clusterer;
4:
5: private final Collection<Cluster> clusters = new ArrayList<Cluster>();
6:
7: @Override
8: protected void map(WritableComparable<?> key, VectorWritable point, Context context)
9: throws IOException, InterruptedException {
10: this.clusterer.emitPointToNearestCluster(point.get(), this.clusters, context);
11: }
12:
13: @Override
14: protected void setup(Context context) throws IOException, InterruptedException {
15: super.setup(context);
16: Configuration conf = context.getConfiguration();
17: try {
18: ClassLoader ccl = Thread.currentThread().getContextClassLoader();
19: DistanceMeasure measure = ccl.loadClass(conf.get(KMeansConfigKeys.DISTANCE_MEASURE_KEY))
20: .asSubclass(DistanceMeasure.class).newInstance();
21: measure.configure(conf);
22:
23: this.clusterer = new KMeansClusterer(measure);
24:
25: String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
26: if (clusterPath != null && clusterPath.length() > 0) {
27: KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
28: if (clusters.isEmpty()) {
29: throw new IllegalStateException("No clusters found. Check your -c path.");
30: }
31: }
32: } catch (ClassNotFoundException e) {
33: throw new IllegalStateException(e);
34: } catch (IllegalAccessException e) {
35: throw new IllegalStateException(e);
36: } catch (InstantiationException e) {
37: throw new IllegalStateException(e);
38: }
39: }
40: }
1: public class KMeansCombiner extends Reducer<Text, ClusterObservations, Text, ClusterObservations> {
2:
3: @Override
4: protected void reduce(Text key, Iterable<ClusterObservations> values, Context context)
5: throws IOException, InterruptedException {
6: Cluster cluster = new Cluster();
7: for (ClusterObservations value : values) {
8: cluster.observe(value);
9: }
10: context.write(key, cluster.getObservations());
11: }
12:
13: }
1: public class KMeansReducer extends Reducer<Text, ClusterObservations, Text, Cluster> {
2:
3: private Map<String, Cluster> clusterMap;
4: private double convergenceDelta;
5: private KMeansClusterer clusterer;
6:
7: @Override
8: protected void reduce(Text key, Iterable<ClusterObservations> values, Context context)
9: throws IOException, InterruptedException {
10: Cluster cluster = clusterMap.get(key.toString());
11: for (ClusterObservations delta : values) {
12: cluster.observe(delta);
13: }
14: // force convergence calculation
15: boolean converged = clusterer.computeConvergence(cluster, convergenceDelta);
16: if (converged) {
17: context.getCounter("Clustering", "Converged Clusters").increment(1);
18: }
19: cluster.computeParameters();
20: context.write(new Text(cluster.getIdentifier()), cluster);
21: }
22:
23: @Override
24: protected void setup(Context context) throws IOException, InterruptedException {
25: super.setup(context);
26: Configuration conf = context.getConfiguration();
27: try {
28: ClassLoader ccl = Thread.currentThread().getContextClassLoader();
29: DistanceMeasure measure = ccl.loadClass(conf.get(KMeansConfigKeys.DISTANCE_MEASURE_KEY))
30: .asSubclass(DistanceMeasure.class).newInstance();
31: measure.configure(conf);
32:
33: this.convergenceDelta = Double.parseDouble(conf.get(KMeansConfigKeys.CLUSTER_CONVERGENCE_KEY));
34: this.clusterer = new KMeansClusterer(measure);
35: this.clusterMap = new HashMap<String, Cluster>();
36:
37: String path = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
38: if (path.length() > 0) {
39: Collection<Cluster> clusters = new ArrayList<Cluster>();
40: KMeansUtil.configureWithClusterInfo(conf, new Path(path), clusters);
41: setClusterMap(clusters);
42: if (clusterMap.isEmpty()) {
43: throw new IllegalStateException("Cluster is empty!");
44: }
45: }
46: } catch (ClassNotFoundException e) {
47: throw new IllegalStateException(e);
48: } catch (IllegalAccessException e) {
49: throw new IllegalStateException(e);
50: } catch (InstantiationException e) {
51: throw new IllegalStateException(e);
52: }
53: }
54: }
1: public class KMeansClusterMapper
2: extends Mapper<WritableComparable<?>,VectorWritable,IntWritable,WeightedVectorWritable> {
3:
4: private final Collection<Cluster> clusters = new ArrayList<Cluster>();
5: private KMeansClusterer clusterer;
6:
7: @Override
8: protected void map(WritableComparable<?> key, VectorWritable point, Context context)
9: throws IOException, InterruptedException {
10: clusterer.outputPointWithClusterInfo(point.get(), clusters, context);
11: }
12:
13: @Override
14: protected void setup(Context context) throws IOException, InterruptedException {
15: super.setup(context);
16: Configuration conf = context.getConfiguration();
17: try {
18: ClassLoader ccl = Thread.currentThread().getContextClassLoader();
19: DistanceMeasure measure = ccl.loadClass(conf.get(KMeansConfigKeys.DISTANCE_MEASURE_KEY))
20: .asSubclass(DistanceMeasure.class).newInstance();
21: measure.configure(conf);
22:
23: String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
24: if (clusterPath != null && clusterPath.length() > 0) {
25: KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
26: if (clusters.isEmpty()) {
27: throw new IllegalStateException("No clusters found. Check your -c path.");
28: }
29: }
30: this.clusterer = new KMeansClusterer(measure);
31: } catch (ClassNotFoundException e) {
32: throw new IllegalStateException(e);
33: } catch (IllegalAccessException e) {
34: throw new IllegalStateException(e);
35: } catch (InstantiationException e) {
36: throw new IllegalStateException(e);
37: }
38: }
39: }
图6
1: VL-1{n=68 c=[5.974, 2.771, 4.501, 1.472] r=[0.471, 0.291, 0.523, 0.303]}
2: Weight: Point:
3: 1.0: [7.000, 3.200, 4.700, 1.400]
4: 1.0: [6.400, 3.200, 4.500, 1.500]
5: 1.0: [6.900, 3.100, 4.900, 1.500]
6: 1.0: [5.500, 2.300, 4.000, 1.300]
7: 1.0: [6.500, 2.800, 4.600, 1.500]
8: 1.0: [5.700, 2.800, 4.500, 1.300]
9: 1.0: [6.300, 3.300, 4.700, 1.600]
10: 1.0: [4.900, 2.400, 3.300, 1.000]
11: 1.0: [6.600, 2.900, 4.600, 1.300]
12: 1.0: [5.200, 2.700, 3.900, 1.400]
13: 1.0: [5.000, 2.000, 3.500, 1.000]
14: 1.0: [5.900, 3.000, 4.200, 1.500]
15: 1.0: [6.000, 2.200, 4.000, 1.000]
16: 1.0: [6.100, 2.900, 4.700, 1.400]
17: 1.0: [5.600, 2.900, 3.600, 1.300]
18: 1.0: [6.700, 3.100, 4.400, 1.400]
19: 1.0: [5.600, 3.000, 4.500, 1.500]
20: 1.0: [5.800, 2.700, 4.100, 1.000]
21: 1.0: [6.200, 2.200, 4.500, 1.500]
22: 1.0: [5.600, 2.500, 3.900, 1.100]
23: 1.0: [5.900, 3.200, 4.800, 1.800]
24: 1.0: [6.100, 2.800, 4.000, 1.300]
25: 1.0: [6.300, 2.500, 4.900, 1.500]
26: 1.0: [6.100, 2.800, 4.700, 1.200]
27: 1.0: [6.400, 2.900, 4.300, 1.300]
28: 1.0: [6.600, 3.000, 4.400, 1.400]
29: 1.0: [6.800, 2.800, 4.800, 1.400]
30: 1.0: [6.700, 3.000, 5.000, 1.700]
31: 1.0: [6.000, 2.900, 4.500, 1.500]
32: 1.0: [5.700, 2.600, 3.500, 1.000]
33: 1.0: [5.500, 2.400, 3.800, 1.100]
34: 1.0: [5.500, 2.400, 3.700, 1.000]
35: 1.0: [5.800, 2.700, 3.900, 1.200]
36: 1.0: [6.000, 2.700, 5.100, 1.600]
37: 1.0: [5.400, 3.000, 4.500, 1.500]
38: 1.0: [6.000, 3.400, 4.500, 1.600]
39: 1.0: [6.700, 3.100, 4.700, 1.500]
40: 1.0: [6.300, 2.300, 4.400, 1.300]
41: 1.0: [5.600, 3.000, 4.100, 1.300]
42: 1.0: [5.500, 2.500, 4.000, 1.300]
43: 1.0: [5.500, 2.600, 4.400, 1.200]
44: 1.0: [6.100, 3.000, 4.600, 1.400]
45: 1.0: [5.800, 2.600, 4.000, 1.200]
46: 1.0: [5.000, 2.300, 3.300, 1.000]
47: 1.0: [5.600, 2.700, 4.200, 1.300]
48: 1.0: [5.700, 3.000, 4.200, 1.200]
49: 1.0: [5.700, 2.900, 4.200, 1.300]
50: 1.0: [6.200, 2.900, 4.300, 1.300]
51: 1.0: [5.100, 2.500, 3.000, 1.100]
52: 1.0: [5.700, 2.800, 4.100, 1.300]
53: 1.0: [5.800, 2.700, 5.100, 1.900]
54: 1.0: [4.900, 2.500, 4.500, 1.700]
55: 1.0: [5.700, 2.500, 5.000, 2.000]
56: 1.0: [5.800, 2.800, 5.100, 2.400]
57: 1.0: [6.000, 2.200, 5.000, 1.500]
58: 1.0: [5.600, 2.800, 4.900, 2.000]
59: 1.0: [6.300, 2.700, 4.900, 1.800]
60: 1.0: [6.200, 2.800, 4.800, 1.800]
61: 1.0: [6.100, 3.000, 4.900, 1.800]
62: 1.0: [6.300, 2.800, 5.100, 1.500]
63: 1.0: [6.100, 2.600, 5.600, 1.400]
64: 1.0: [6.000, 3.000, 4.800, 1.800]
65: 1.0: [5.800, 2.700, 5.100, 1.900]
66: 1.0: [6.300, 2.500, 5.000, 1.900]
67: 1.0: [5.900, 3.000, 5.100, 1.800]
68: VL-0{n=51 c=[5.008, 3.400, 1.494, 0.261] r=[0.346, 0.395, 0.273, 0.159]}
69: Weight: Point:
70: 1.0: [5.100, 3.500, 1.400, 0.200]
71: 1.0: [4.900, 3.000, 1.400, 0.200]
72: 1.0: [4.700, 3.200, 1.300, 0.200]
73: 1.0: [4.600, 3.100, 1.500, 0.200]
74: 1.0: [5.000, 3.600, 1.400, 0.200]
75: 1.0: [5.400, 3.900, 1.700, 0.400]
76: 1.0: [4.600, 3.400, 1.400, 0.300]
77: 1.0: [5.000, 3.400, 1.500, 0.200]
78: 1.0: [4.400, 2.900, 1.400, 0.200]
79: 1.0: [4.900, 3.100, 1.500, 0.100]
80: 1.0: [5.400, 3.700, 1.500, 0.200]
81: 1.0: [4.800, 3.400, 1.600, 0.200]
82: 1.0: [4.800, 3.000, 1.400, 0.100]
83: 1.0: [4.300, 3.000, 1.100, 0.100]
84: 1.0: [5.800, 4.000, 1.200, 0.200]
85: 1.0: [5.700, 4.400, 1.500, 0.400]
86: 1.0: [5.400, 3.900, 1.300, 0.400]
87: 1.0: [5.100, 3.500, 1.400, 0.300]
88: 1.0: [5.700, 3.800, 1.700, 0.300]
89: 1.0: [5.100, 3.800, 1.500, 0.300]
90: 1.0: [5.400, 3.400, 1.700, 0.200]
91: 1.0: [5.100, 3.700, 1.500, 0.400]
92: 1.0: [4.600, 3.600, 1.000, 0.200]
93: 1.0: [5.100, 3.300, 1.700, 0.500]
94: 1.0: [4.800, 3.400, 1.900, 0.200]
95: 1.0: [5.000, 3.000, 1.600, 0.200]
96: 1.0: [5.000, 3.400, 1.600, 0.400]
97: 1.0: [5.200, 3.500, 1.500, 0.200]
98: 1.0: [5.200, 3.400, 1.400, 0.200]
99: 1.0: [4.700, 3.200, 1.600, 0.200]
100: 1.0: [4.800, 3.100, 1.600, 0.200]
101: 1.0: [5.400, 3.400, 1.500, 0.400]
102: 1.0: [5.200, 4.100, 1.500, 0.100]
103: 1.0: [5.500, 4.200, 1.400, 0.200]
104: 1.0: [4.900, 3.100, 1.500, 0.100]
105: 1.0: [5.000, 3.200, 1.200, 0.200]
106: 1.0: [5.500, 3.500, 1.300, 0.200]
107: 1.0: [4.900, 3.100, 1.500, 0.100]
108: 1.0: [4.400, 3.000, 1.300, 0.200]
109: 1.0: [5.100, 3.400, 1.500, 0.200]
110: 1.0: [5.000, 3.500, 1.300, 0.300]
111: 1.0: [4.500, 2.300, 1.300, 0.300]
112: 1.0: [4.400, 3.200, 1.300, 0.200]
113: 1.0: [5.000, 3.500, 1.600, 0.600]
114: 1.0: [5.100, 3.800, 1.900, 0.400]
115: 1.0: [4.800, 3.000, 1.400, 0.300]
116: 1.0: [5.100, 3.800, 1.600, 0.200]
117: 1.0: [4.600, 3.200, 1.400, 0.200]
118: 1.0: [5.300, 3.700, 1.500, 0.200]
119: 1.0: [5.000, 3.300, 1.400, 0.200]
120: VL-2{n=31 c=[6.932, 3.106, 5.855, 2.142] r=[0.491, 0.293, 0.449, 0.235]}
121: Weight: Point:
122: 1.0: [6.300, 3.300, 6.000, 2.500]
123: 1.0: [7.100, 3.000, 5.900, 2.100]
124: 1.0: [6.300, 2.900, 5.600, 1.800]
125: 1.0: [6.500, 3.000, 5.800, 2.200]
126: 1.0: [7.600, 3.000, 6.600, 2.100]
127: 1.0: [7.300, 2.900, 6.300, 1.800]
128: 1.0: [6.700, 2.500, 5.800, 1.800]
129: 1.0: [7.200, 3.600, 6.100, 2.500]
130: 1.0: [6.500, 3.200, 5.100, 2.000]
131: 1.0: [6.400, 2.700, 5.300, 1.900]
132: 1.0: [6.800, 3.000, 5.500, 2.100]
133: 1.0: [6.400, 3.200, 5.300, 2.300]
134: 1.0: [6.500, 3.000, 5.500, 1.800]
135: 1.0: [7.700, 3.800, 6.700, 2.200]
136: 1.0: [7.700, 2.600, 6.900, 2.300]
137: 1.0: [6.900, 3.200, 5.700, 2.300]
138: 1.0: [7.700, 2.800, 6.700, 2.000]
139: 1.0: [6.700, 3.300, 5.700, 2.100]
140: 1.0: [7.200, 3.200, 6.000, 1.800]
141: 1.0: [6.400, 2.800, 5.600, 2.100]
142: 1.0: [7.200, 3.000, 5.800, 1.600]
143: 1.0: [7.400, 2.800, 6.100, 1.900]
144: 1.0: [7.900, 3.800, 6.400, 2.000]
145: 1.0: [6.400, 2.800, 5.600, 2.200]
146: 1.0: [7.700, 3.000, 6.100, 2.300]
147: 1.0: [6.300, 3.400, 5.600, 2.400]
148: 1.0: [6.400, 3.100, 5.500, 1.800]
149: 1.0: [6.900, 3.100, 5.400, 2.100]
150: 1.0: [6.700, 3.100, 5.600, 2.400]
151: 1.0: [6.900, 3.100, 5.100, 2.300]
152: 1.0: [6.800, 3.200, 5.900, 2.300]
153: 1.0: [6.700, 3.300, 5.700, 2.500]
154: 1.0: [6.700, 3.000, 5.200, 2.300]
155: 1.0: [6.500, 3.000, 5.200, 2.000]
156: 1.0: [6.200, 3.400, 5.400, 2.300]
1: library(rgl)
2: ttt = textConnection("
3: x y z t group
4: 5.100 3.500 1.400 0.200 Iris-setosa
5: .........
6: 6.300 3.300 6.000 2.500 Iris-virginica
7: .........
8: 7.000 3.200 4.700 1.400 Iris-versicolor
9: .........
10: ")
11:
12: colors =c("red","green","blue")
13: pca <- read.table(ttt,header=TRUE)
14: p3d <- plot3d(pca$x, pca$y, pca$z, xlab="feature 1", ylab="feature 2",
15: zlab="feature 3",
16: col=as.integer(pca$group) ,
17: box=FALSE, size=5)
联系客服