-
Notifications
You must be signed in to change notification settings - Fork 1
/
simple.R
47 lines (33 loc) · 1.16 KB
/
simple.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
library(datasets)
head(iris)
# After a little bit of exploration, I found that Petal.Length and Petal.Width
# were similar among the same species but varied considerably between different
# species
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()
# output in the /simple folder
# Clustering
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
# K-means clustering with 3 clusters of sizes 46, 54, 50
# Cluster means:
# Petal.Length Petal.Width
# 1 5.626087 2.047826
# 2 4.292593 1.359259
# 3 1.462000 0.246000
# Within cluster sum of squares by cluster:
# [1] 15.16348 14.22741 2.02200
# (between_SS / total_SS = 94.3 %)
#
# Available components:
#
# [1] "cluster" "centers" "totss" "withinss"
# [5] "tot.withinss" "betweenss" "size" "iter"
# [9] "ifault"
# Let us compare the clusters with the species.
#
table(irisCluster$cluster, iris$Species)
# Lets plot the data to see the clusters:
irisCluster$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = irisCluster$cluster)) + geom_point()