From 0aa1bb86ca6a3669032fa8a42af4d1ce5fbbc22a Mon Sep 17 00:00:00 2001 From: Lukas Fuchs <86153843+f-lukas@users.noreply.github.com> Date: Fri, 16 Aug 2024 09:09:21 +0200 Subject: [PATCH] fix cluster ids (#239) * fix cluster ids * extract logic into function * fix name of df --------- Co-authored-by: Antonia Runge --- DESCRIPTION | 2 +- NEWS.md | 5 +++++ R/01-estimateMap.R | 5 +++++ R/01-predictNearestCluster.R | 15 +++++++++++++++ man/makeClusterIdsContinuous.Rd | 16 ++++++++++++++++ 5 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 man/makeClusterIdsContinuous.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6f1aca6b..bd56ac4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: DSSM Title: Pandora & IsoMemo spatiotemporal modeling -Version: 24.08.0 +Version: 24.08.1 Authors@R: c( person("Marcus", "Gross", email = "marcus.gross@inwt-statistics.de", role = c("cre", "aut")), person("Antonia", "Runge", email = "antonia.runge@inwt-statistics.de", role = c("aut")) diff --git a/NEWS.md b/NEWS.md index 2deff5ff..3aef2c61 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# DSSM 24.08.1 + +## Bug Fixes +- fixes cluster ids being non continuous in some cases (#238) + # DSSM 24.08.0 ## New Features diff --git a/R/01-estimateMap.R b/R/01-estimateMap.R index f67509e7..57c29499 100644 --- a/R/01-estimateMap.R +++ b/R/01-estimateMap.R @@ -2117,6 +2117,8 @@ estimateMapKernel <- function(data, cluster_centers$cluster <- 1:nrow(cluster_centers) data2 <- merge(data2, cluster_centers, sort = FALSE) colnames(data2)[colnames(data2)=="cluster"] <- "spatial_cluster" + + data2$spatial_cluster <- data2$spatial_cluster %>% makeClusterIdsContinuous() } if(!is.null(Weighting) & !(Weighting == "")){ model <- try(lapply(1:nSim, function(x){ @@ -2526,6 +2528,9 @@ estimateMap3DKernel <- function(data, clust$cluster <- 1:nrow(clust) data <- merge(data, clust, sort = FALSE) colnames(data)[colnames(data)=="cluster"] <- "temporal_group" + + data$temporal_group <- data$temporal_group %>% makeClusterIdsContinuous() + data$spatial_cluster <- data$spatial_cluster %>% makeClusterIdsContinuous() } if ( class(model)[1] == "try-error") {return("Error in Model Fitting.")} sc <- NULL diff --git a/R/01-predictNearestCluster.R b/R/01-predictNearestCluster.R index 3642a4b2..0d0159ef 100644 --- a/R/01-predictNearestCluster.R +++ b/R/01-predictNearestCluster.R @@ -5,3 +5,18 @@ predictNearestCluster <- function(object, newdata){ dist_mat <- dist_mat[-seq(n_centers), seq(n_centers)] max.col(-dist_mat) } + +#' Make cluster ids continuous +#' +#' When Mclust is used with e.g. 10 clusters, it can still happen that some clusters are empty +#' In this case we would see a jump in cluster ids e.g. 1,2,5,... +#' To prevent this, we change the cluster ids in the last step. +#' +#' @param column_with_ids A vector with cluster ids +makeClusterIdsContinuous <- function(column_with_ids) { + if (length(column_with_ids) == 0) { + return(column_with_ids) + } + + match(column_with_ids, sort(unique(column_with_ids))) +} diff --git a/man/makeClusterIdsContinuous.Rd b/man/makeClusterIdsContinuous.Rd new file mode 100644 index 00000000..fc8b5ab1 --- /dev/null +++ b/man/makeClusterIdsContinuous.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/01-predictNearestCluster.R +\name{makeClusterIdsContinuous} +\alias{makeClusterIdsContinuous} +\title{Make cluster ids continuous} +\usage{ +makeClusterIdsContinuous(column_with_ids) +} +\arguments{ +\item{column_with_ids}{A vector with cluster ids} +} +\description{ +When Mclust is used with e.g. 10 clusters, it can still happen that some clusters are empty +In this case we would see a jump in cluster ids e.g. 1,2,5,... +To prevent this, we change the cluster ids in the last step. +}