mmcc2mmcp {truecluster} | R Documentation |
These functions evaluate the MMCC matrix created by resample aggregation.
mmcc2cluster(c) mmcc2mmcp(c, rowsums = NULL, equal.rowsums = FALSE) mmcp2pk(p) # marginal cluster probabilities mmcp2RMC(p) # relative model complexity mmcp2h(p) # cellwise conditional entropy = uncertainty mmcp2H(p) # conditional entropy = uncertainty mmcp2d(p) # cellwise non-penalized information mmcp2D(p) # non-penalized information mmcp2i(p) # cellwise penalized information mmcp2I(p) # penalized information mmcp2cic(p) # cellwise CIC mmcp2CIC(p) # CIC mmcp2ALL(p) # c(N, H, D, RMC, I, CIC) mmcp2gsd(p) # rowwise Generalized Silhouette Value mmcp2GSD(p, FUN=summary) # clusterwise statistics of Generalized Silhouette Value
c |
MMCC matrix of cluster votes |
p |
MMCP matrix of cluster probabilities |
rowsums |
specifying vector or scalar of rowsums of mmcc speeds up calculations |
equal.rowsums |
TRUE uses sum(mmcc[1,]) as rowsums, ignored if rowsums is given |
FUN |
a function summarizing a vector of GSD values (called once per cluster) |
mmcc2cluster
assigns each case (row) to the most likely cluster (column) breaking ties at random.
Function mmcc2cluster returns a vector of cluster memberships. Function mmcc2mmcp returns converts an MMCC (counted cluster votes) matrix into an MMCP matrix (estimated cluster probabilities). Function mmcp2RMC returns scalar giving the relative model complexity Function mmcp2gsd returns a vector of Generalized Silhouette Diagnostics Function mmcp2GSD returns a matrix[statistics,clusters] summarizing Generalized Silhouette Diagnostics Function mmcp2xx returns a matrix derived from MMCP. Function mmcp2XX returns mean(rowSums(mmcp2xx())) and might be a bit faster.
Jens Oehlschlägel
xx
cat("\nfake a cluster voting matrix\n") c <- c(rbinom(4, 256, 0.25), rbinom(12, 256, 0.75)) c <- cbind(a=c, b=256-c) c mmcc2cluster(c) table(mmcc2cluster(c)) cat("\nconvert to probabilities matrix\n") p <- mmcc2mmcp(c) p cat("\nlook at Relative Model Complexity\n") mmcp2RMC(p) cat("\ncellwise Cluster Information Criterion\n") mmcp2cic(p) cat("\ncluster CIC\n") colSums(mmcp2cic(p)) cat("\ncase CIC (aggregated by cluster)\n") aggregate(rowSums(mmcp2cic(p)), by=list(mmcc2cluster(p)), FUN=mean) cat("\ncalculating model CIC\n") mean(rowSums(mmcp2cic(p))) mmcp2CIC(p) cat("\ncalculating all model statistics\n") mmcp2ALL(p) cat("\nGeneralized Silhouette Diagnostic\n") mmcp2GSD(p) boxplot(split(mmcp2gsd(p), mmcc2cluster(p)), main="GSD distribution by cluster") boxplot(split(rowSums(mmcp2h(p)), mmcc2cluster(p)), main="Uncertainty distribution by cluster") cat("\nshow that ties are broken at random\n") c <- matrix(1, 16, 4) table(mmcc2cluster(c)) table(mmcc2cluster(c)) cat("\ncompare some models differing in true and model complexity\n") collect <- NULL c <- matrix(1, 16, 1) collect <- rbind(collect, "true=1 model=1"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=1 model=2"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=1 model=4"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=1 model=8"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=1 model=16"=mmcp2ALL(mmcc2mmcp(c))) c <- t(matrix(rep(diag(2), 8), 2)) collect <- rbind(collect, "true=2 model=2"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=2 model=4"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=2 model=8"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=2 model=16"=mmcp2ALL(mmcc2mmcp(c))) c <- t(matrix(rep(diag(4), 4), 4)) collect <- rbind(collect, "true=4 model=4"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=4 model=8"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=4 model=16"=mmcp2ALL(mmcc2mmcp(c))) c <- t(matrix(rep(diag(8), 2), 8)) collect <- rbind(collect, "true=8 model=8"=mmcp2ALL(mmcc2mmcp(c))) c <- cbind(c, c) collect <- rbind(collect, "true=8 model=16"=mmcp2ALL(mmcc2mmcp(c))) c <- diag(16) collect <- rbind(collect, "true=16 model=16"=mmcp2ALL(mmcc2mmcp(c))) colnames(collect)[c(5,6)] <- c("D*(1-RMC)=I","I-H=CIC") print(collect[order(collect[,6]),])