library(FactoMineR)
data("decathlon")
summary(decathlon)
## 100m Long.jump Shot.put High.jump
## Min. :10.44 Min. :6.61 Min. :12.68 Min. :1.850
## 1st Qu.:10.85 1st Qu.:7.03 1st Qu.:13.88 1st Qu.:1.920
## Median :10.98 Median :7.30 Median :14.57 Median :1.950
## Mean :11.00 Mean :7.26 Mean :14.48 Mean :1.977
## 3rd Qu.:11.14 3rd Qu.:7.48 3rd Qu.:14.97 3rd Qu.:2.040
## Max. :11.64 Max. :7.96 Max. :16.36 Max. :2.150
## 400m 110m.hurdle Discus Pole.vault
## Min. :46.81 Min. :13.97 Min. :37.92 Min. :4.200
## 1st Qu.:48.93 1st Qu.:14.21 1st Qu.:41.90 1st Qu.:4.500
## Median :49.40 Median :14.48 Median :44.41 Median :4.800
## Mean :49.62 Mean :14.61 Mean :44.33 Mean :4.762
## 3rd Qu.:50.30 3rd Qu.:14.98 3rd Qu.:46.07 3rd Qu.:4.920
## Max. :53.20 Max. :15.67 Max. :51.65 Max. :5.400
## Javeline 1500m Rank Points
## Min. :50.31 Min. :262.1 Min. : 1.00 Min. :7313
## 1st Qu.:55.27 1st Qu.:271.0 1st Qu.: 6.00 1st Qu.:7802
## Median :58.36 Median :278.1 Median :11.00 Median :8021
## Mean :58.32 Mean :279.0 Mean :12.12 Mean :8005
## 3rd Qu.:60.89 3rd Qu.:285.1 3rd Qu.:18.00 3rd Qu.:8122
## Max. :70.52 Max. :317.0 Max. :28.00 Max. :8893
## Competition
## Decastar:13
## OlympicG:28
##
##
##
##
head(decathlon)
## 100m Long.jump Shot.put High.jump 400m 110m.hurdle Discus
## SEBRLE 11.04 7.58 14.83 2.07 49.81 14.69 43.75
## CLAY 10.76 7.40 14.26 1.86 49.37 14.05 50.72
## KARPOV 11.02 7.30 14.77 2.04 48.37 14.09 48.95
## BERNARD 11.02 7.23 14.25 1.92 48.93 14.99 40.87
## YURKOV 11.34 7.09 15.19 2.10 50.42 15.31 46.26
## WARNERS 11.11 7.60 14.31 1.98 48.68 14.23 41.10
## Pole.vault Javeline 1500m Rank Points Competition
## SEBRLE 5.02 63.19 291.7 1 8217 Decastar
## CLAY 4.92 60.15 301.5 2 8122 Decastar
## KARPOV 4.92 50.31 300.2 3 8099 Decastar
## BERNARD 5.32 62.77 280.1 4 8067 Decastar
## YURKOV 4.72 63.44 276.4 5 8036 Decastar
## WARNERS 4.92 51.77 278.1 6 8030 Decastar
decathlonR <- decathlon
decathlonR[1:10] <- scale(decathlon[1:10], center = TRUE)
library("spe")
data("swissroll")
library("rgl")
swissroll <- swissroll[order(swissroll$z),]
plot3d(swissroll, col = plotrix::color.scale(1:nrow(swissroll),c(0,1,1),c(1,1,0),0))
You must enable Javascript to view this page properly.
library("tidyverse")
Clusters <- read_delim("donclassif.txt.gz", ";")
PlotClusters <- function(CoordPoints, Col = NULL) {
if (is.null(Col)) {
Col = sample(1:nrow(CoordPoints))
}
ggplot(data.frame(X = CoordPoints[, 1, drop = TRUE],
Y = CoordPoints[, 2, drop = TRUE],
Col = Col),
aes(x = X, y = Y)) +
geom_point(aes(colour = Col), size = 2) + guides(color = FALSE) +
coord_fixed()
}
PlotClusters(Clusters)
PlotDecathlon <- function(CoordPoints, Col = NULL) {
if (is.null(Col)) {
Col = decathlon$Points
}
ggplot(data = data.frame( X = CoordPoints[, 1, drop = TRUE],
Y = CoordPoints[, 2, drop = TRUE],
Col = Col),
aes(x = X, y = Y)) +
geom_point(aes(colour = Col), size = 5) + geom_text(label = row.names(decathlon), vjust = -1.25) +
scale_x_continuous(expand = c(.15,0)) + scale_y_continuous(expand = c(.1,0)) +
guides(color = FALSE) +
coord_fixed()
}
PlotSwissRoll <- function(CoordPoints, Col = NULL) {
if (is.null(Col)) {
Col = 1:nrow(swissroll)
}
ggplot(data = data.frame( X = CoordPoints[, 1, drop = TRUE],
Y = CoordPoints[, 2, drop = TRUE],
Col = Col),
aes(x = X, y = Y)) +
geom_point(aes(colour = Col), size = 5) + guides(color = FALSE) +
coord_fixed()
}
DecathlonPCA <- PCA(decathlon[1:10])
PlotDecathlon(DecathlonPCA$ind$coord)
DecathlonRPCA <- PCA(decathlonR[1:10])
PlotDecathlon(DecathlonRPCA$ind$coord)
SwissRollPCA <- PCA(swissroll)
PlotSwissRoll(SwissRollPCA$ind$coord)
ClustersPCA <- PCA(Clusters)
PlotClusters(ClustersPCA$ind$coord)
data(poison)
summary(poison)
## Age Time Sick Sex Nausea
## Min. : 4.00 Min. : 0.00 Sick_n:17 F:28 Nausea_n:43
## 1st Qu.: 6.00 1st Qu.: 0.00 Sick_y:38 M:27 Nausea_y:12
## Median : 8.00 Median :12.00
## Mean :16.93 Mean :10.16
## 3rd Qu.:10.00 3rd Qu.:16.50
## Max. :88.00 Max. :22.00
## Vomiting Abdominals Fever Diarrhae Potato
## Vomit_n:33 Abdo_n:18 Fever_n:20 Diarrhea_n:20 Potato_n: 3
## Vomit_y:22 Abdo_y:37 Fever_y:35 Diarrhea_y:35 Potato_y:52
##
##
##
##
## Fish Mayo Courgette Cheese Icecream
## Fish_n: 1 Mayo_n:10 Courg_n: 5 Cheese_n: 7 Icecream_n: 4
## Fish_y:54 Mayo_y:45 Courg_y:50 Cheese_y:48 Icecream_y:51
##
##
##
##
head(poison)
## Age Time Sick Sex Nausea Vomiting Abdominals Fever Diarrhae
## 1 9 22 Sick_y F Nausea_y Vomit_n Abdo_y Fever_y Diarrhea_y
## 2 5 0 Sick_n F Nausea_n Vomit_n Abdo_n Fever_n Diarrhea_n
## 3 6 16 Sick_y F Nausea_n Vomit_y Abdo_y Fever_y Diarrhea_y
## 4 9 0 Sick_n F Nausea_n Vomit_n Abdo_n Fever_n Diarrhea_n
## 5 7 14 Sick_y M Nausea_n Vomit_y Abdo_y Fever_y Diarrhea_y
## 6 72 9 Sick_y M Nausea_n Vomit_n Abdo_y Fever_y Diarrhea_y
## Potato Fish Mayo Courgette Cheese Icecream
## 1 Potato_y Fish_y Mayo_y Courg_y Cheese_y Icecream_y
## 2 Potato_y Fish_y Mayo_y Courg_y Cheese_n Icecream_y
## 3 Potato_y Fish_y Mayo_y Courg_y Cheese_y Icecream_y
## 4 Potato_y Fish_y Mayo_n Courg_y Cheese_y Icecream_y
## 5 Potato_y Fish_y Mayo_y Courg_y Cheese_y Icecream_y
## 6 Potato_y Fish_n Mayo_y Courg_y Cheese_y Icecream_y
PoisonMFA <- MFA(poison, group=c(2,2,5,6), type=c("s","n","n","n"),
name.group=c("desc","desc2","symptom","eat"))
library("fastICA")
DecathlonICA <- fastICA(decathlon[1:10], n.comp = 2)
PlotDecathlon(DecathlonICA$S)
DecathlonRICA <- fastICA(decathlonR[1:10], n.comp = 2)
PlotDecathlon(DecathlonRICA$S)
SwissRollICA <- fastICA(swissroll, n.comp = 2)
PlotSwissRoll(SwissRollICA$S)
ClustersICA <- fastICA(Clusters, n.comp = 2)
PlotClusters(ClustersICA$S)
library("NMF")
DecathlonNMF <- nmf(decathlon[1:10], rank = 2)
PlotDecathlon(basis(DecathlonNMF))
library("kernlab")
DecathlonKPCA <- kpca(decathlon[1:10], features = 2)
PlotDecathlon(as.matrix(scale(decathlon[1:10], scale = FALSE)) %*% pcv(DecathlonKPCA))
DecathlonRKPCA <- kpca(decathlonR[1:10], features = 2)
PlotDecathlon(as.matrix(scale(decathlonR[1:10], scale = FALSE)) %*% pcv(DecathlonRKPCA))
SwissRollKPCA <- kpca(swissroll, features = 2)
PlotSwissRoll(as.matrix(scale(swissroll, scale = FALSE)) %*% pcv(SwissRollKPCA))
#library(keras)
# pca_encoder <- keras_model_sequential() %>%
# layer_dense(units = 2, input_shape = c(10))
# pca_decoder <- keras_model_sequential() %>% layer_dense(units = 10, input_shape = c(2))
# pca_autoencoder <- keras_model_sequential() %>% pca_encoder() %>% pca_decoder()
#
# pca_autoencoder %>% compile(loss = "mse", optimizer = "sgd",
# metric= "mse")
#
# decathlonR100 <- decathlonR %>% slice(rep(seq_len(nrow(decathlonR)),100))
#
# pca_hist <- pca_autoencoder %>% fit(x = as.matrix(decathlonR100[,1:10]),
# y= as.matrix(decathlonR100[,1:10]),
# epochs = 1000,
# batch_size = 32,
# verbose = 0)
# decathlon_pca_coords <- predict(pca_encoder, x = as.matrix(decathlonR[,1:10]))
# PlotDecathlon(decathlon_pca_coords)
# encoder <- keras_model_sequential() %>%
# layer_dense(units = 4, activation = "relu", input_shape = c(10)) %>%
# layer_dense(units = 2)
# decoder <- keras_model_sequential() %>%
# layer_dense(units = 4, activation = "relu", input_shape = c(2)) %>%
# layer_dense(units = 10)
# autoencoder <- keras_model_sequential() %>% encoder %>% decoder()
#
# autoencoder %>% compile(loss = "mse", optimizer = "sgd",
# metric= "mse")
#
# ae_hist <- autoencoder %>% fit(x = as.matrix(decathlonR100[,1:10]),
# y= as.matrix(decathlonR100[,1:10]),
# epochs = 5000,
# batch_size = 32,
# verbose = 0)
# decathlon_ae_coords <- predict(encoder, x = as.matrix(decathlonR[,1:10]))
# PlotDecathlon(decathlon_ae_coords)
DistDecathlon <- dist(decathlon[1:10])
DecathlonMDS <- cmdscale(DistDecathlon, k = 2)
PlotDecathlon(DecathlonMDS)
DistDecathlonR <- dist(decathlonR[1:10])
DecathlonRMDS <- cmdscale(DistDecathlonR, k = 2)
PlotDecathlon(DecathlonRMDS)
DistSwissRoll <- dist(swissroll)
SwissRollMDS <- cmdscale(DistSwissRoll, k = 2)
PlotSwissRoll(SwissRollMDS)
DistClusters <- dist(Clusters)
DistUClusters <- dist(unique(Clusters))
ClustersMDS <- cmdscale(DistClusters, k = 2)
PlotClusters(ClustersMDS)
library("vegan")
DecathlonISOMAP <- isomap(DistDecathlon, ndim = 2, k = 5)
PlotDecathlon(DecathlonISOMAP$points)
DecathlonRISOMAP <- isomap(DistDecathlonR, ndim = 2, k = 5)
PlotDecathlon(DecathlonRISOMAP$points)
SwissRollISOMAP <- isomap(DistSwissRoll, ndim = 2, k = 10)
PlotSwissRoll(SwissRollISOMAP$points)
library("chemometrics")
DecathlonRandom <- RPvectors(2, 10)
PlotDecathlon(as.matrix(decathlon[1:10]) %*% DecathlonRandom)
PlotDecathlon(as.matrix(decathlonR[1:10]) %*% DecathlonRandom)
SwissRollRandom <- RPvectors(2, 3)
PlotSwissRoll(as.matrix(swissroll) %*% SwissRollRandom)
ClustersRandom <- RPvectors(2, 2)
PlotClusters(as.matrix(Clusters) %*% ClustersRandom)
library("lle")
DecathlonLLE <- lle(decathlon[1:10], m = 2, k = 10)
## finding neighbours
## calculating weights
## computing coordinates
PlotDecathlon(DecathlonLLE$Y)
DecathlonRLLE <- lle(decathlonR[1:10], m = 2, k = 10)
## finding neighbours
## calculating weights
## computing coordinates
PlotDecathlon(DecathlonRLLE$Y)
SwissRollLLE <- lle(swissroll, m = 2, k = 10)
## finding neighbours
## calculating weights
## computing coordinates
PlotSwissRoll(SwissRollLLE$Y)
ClustersLLE <- lle(Clusters, m = 2, k = 10)
## finding neighbours
## calculating weights
## computing coordinates
PlotClusters(ClustersLLE$Y)
library("Rtsne")
DecathlonTSNE <- Rtsne(as.matrix(decathlon[1:10]), perplexity = 10, pca = FALSE)
PlotDecathlon(DecathlonTSNE$Y)
DecathlonRTSNE <- Rtsne(as.matrix(decathlonR[1:10]), perplexity = 10, pca = FALSE)
PlotDecathlon(DecathlonRTSNE$Y)
SwissRollTSNE <- Rtsne(as.matrix(swissroll), perplexity = 30, pca = FALSE)
PlotSwissRoll(SwissRollTSNE$Y)
ClustersTSNE <- Rtsne(as.matrix(unique(Clusters)), perplexity = 30, pca = FALSE)
PlotSwissRoll(ClustersTSNE$Y, Col = 1:nrow(unique(Clusters)))
library(uwot)
DecathlonUMAP <- umap(as.matrix(decathlon[1:10]))
PlotDecathlon(DecathlonUMAP)
DecathlonRUMAP <- umap(as.matrix(decathlonR[1:10]))
PlotDecathlon(DecathlonRUMAP)
SwissRollUMAP <- umap(as.matrix(swissroll))
PlotSwissRoll(SwissRollUMAP)
ClustersUMAP <- umap(as.matrix(unique(Clusters)))
PlotSwissRoll(ClustersUMAP, Col = 1:nrow(unique(Clusters)))
library("igraph")
DecathlonLap = embed_laplacian_matrix(graph.adjacency(as.matrix(exp(-DistDecathlon/3)), weighted = TRUE),2)
PlotDecathlon(DecathlonLap$X)
DecathlonRLap = embed_laplacian_matrix(graph.adjacency(as.matrix(exp(-DistDecathlonR/3)), weighted = TRUE),2)
PlotDecathlon(DecathlonRLap$X)
SwissRollLap = embed_laplacian_matrix(graph.adjacency(as.matrix(exp(-DistSwissRoll/3)), weighted = TRUE),2)
PlotSwissRoll(SwissRollLap$X)
K <- 4
K2 <- 10
DecathlonRKmeans <- kmeans(decathlonR[1:10], K)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRKmeans$cluster))
DecathlonRPCAKmeans <- kmeans(DecathlonRPCA$ind$coord[,1:2], K)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRPCAKmeans$cluster))
PlotDecathlon(DecathlonTSNE$Y, Col = factor(DecathlonRKmeans$cluster))
DecathlonTSNEKmeans <- kmeans(DecathlonTSNE$Y, K)
PlotDecathlon(DecathlonTSNE$Y, Col = factor(DecathlonTSNEKmeans$cluster))
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonTSNEKmeans$cluster))
ClustersKmeans <- kmeans(Clusters, K2)
PlotClusters(Clusters, Col = factor(ClustersKmeans$cluster))
ClustersTSNEKmeans <- kmeans(ClustersTSNE$Y, K2)
PlotClusters(unique(Clusters), Col = factor(ClustersTSNEKmeans$cluster))
DecathlonRPam <- pam(decathlonR[1:10], K)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRPam$cluster))
DecathlonRPCAPam <- pam(DecathlonRPCA$ind$coord[,1:2], K)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRPCAPam$cluster))
DecathlonTSNEPam <- pam(DecathlonTSNE$Y, K)
PlotDecathlon(DecathlonTSNE$Y, Col = factor(DecathlonTSNEPam$cluster))
ClustersPam <- kmeans(Clusters, K2)
PlotClusters(Clusters, Col = factor(ClustersPam$cluster))
ClustersTSNEPam <- kmeans(ClustersTSNE$Y, K2)
PlotClusters(unique(Clusters), Col = factor(ClustersTSNEPam$cluster))
library("Rmixmod")
DecathlonGMM <- mixmodCluster(decathlon[1:10], K)
PlotDecathlon(DecathlonPCA$ind$coord, Col = factor(DecathlonGMM@bestResult@partition))
DecathlonRGMM <- mixmodCluster(decathlonR[1:10], K)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRGMM@bestResult@partition))
ClustersGMM <- mixmodCluster(Clusters, K2)
PlotClusters(Clusters, Col = factor(ClustersGMM@bestResult@partition))
library(topicmodels)
data("AssociatedPress")
k = 2
ap_lda <- LDA(AssociatedPress, k , control = list(seed = 1234))
ap_lda
## A LDA_VEM topic model with 2 topics.
terms(ap_lda,3)
## Topic 1 Topic 2
## [1,] "percent" "i"
## [2,] "million" "president"
## [3,] "new" "government"
library("fpc")
DecathlonDBScan <- dbscan(decathlon[1:10], 10)
PlotDecathlon(DecathlonPCA$ind$coord, Col = factor(DecathlonDBScan$cluster))
DecathlonRDBScan <- dbscan(decathlonR[1:10], 3)
PlotDecathlon(DecathlonRPCA$ind$coord, Col = factor(DecathlonRDBScan$cluster))
ClustersDBScan <- dbscan(Clusters, .2)
PlotClusters(Clusters, Col = factor(ClustersDBScan$cluster))
ClustersTSNEDBScan <- dbscan(ClustersTSNE$Y, 2)
PlotClusters(unique(Clusters), Col = factor(ClustersTSNEDBScan$cluster))
library("pdfCluster")
ClustersPdf <- pdfCluster(Clusters)
PlotClusters(Clusters, Col = factor(ClustersPdf@clusters))
library("LPCM")
ClustersMS <- ms(Clusters, h = .04)
PlotClusters(Clusters, Col = factor(ClustersMS$cluster.label))
library("ggdendro")
library("fastcluster")
ClustersMLD <- hclust(DistClusters, method = "single")
ggdendrogram(ClustersMLD)
ClustersML <- cutree(ClustersMLD, k = K2)
PlotClusters(Clusters, Col = factor(ClustersML))
ClustersALD <- hclust(DistClusters, method = "average")
ggdendrogram(ClustersALD)
ClustersAL <- cutree(ClustersALD, k = K2)
PlotClusters(Clusters, Col = factor(ClustersAL))
ClustersMMLD <- hclust(DistClusters, method = "complete")
ggdendrogram(ClustersMMLD)
ClustersMML <- cutree(ClustersMMLD, k = K2)
PlotClusters(Clusters, Col = factor(ClustersMML))
ClustersWardD <- hclust(DistClusters, method = "ward.D2")
ggdendrogram(ClustersWardD)
ClustersWard <- cutree(ClustersWardD, k = K2)
PlotClusters(Clusters, Col = factor(ClustersWard))
ClustersDianaD <- as.hclust(diana(Clusters))
ggdendrogram(ClustersDianaD)
ClustersDiana <- cutree(ClustersDianaD, k = K2)
PlotClusters(Clusters, Col = factor(ClustersDiana))
library("apcluster")
ClustersAF <- apcluster(negDistMat(r=2), Clusters)
PlotClusters(Clusters, labels(ClustersAF))
ClustersClara <- clara(Clusters, K2)
PlotClusters(Clusters, Col = factor(ClustersClara$clustering))
ClustersSNEClara <- clara(ClustersTSNE$Y, K2)
PlotClusters(unique(Clusters), Col = factor(ClustersSNEClara$clustering))