plot_scatterplot <- function(real, imputed) {
plot_data <- data.frame(real = real, imputed = imputed);
print(ggplot(plot_data, aes(x=real, y=imputed)) + geom_point())
print(paste("Correlation:", cor(real, imputed)))
}
generate_NAs <- function(dataset, feature, noNA) {
idx <- sample(1:nrow(dataset), noNA*nrow(dataset), replace = FALSE)
dataset[idx,feature] <- NA
return(dataset)
}
mean_imputation_solution <- function(dataset, feature) {
meanv <- mean(dataset[,feature], rm.na = TRUE)
dataset[is.na(dataset[,feature]),feature] <- meanv
return(dataset)
}
random_imputation_solution <- function(dataset, feature) {
idx <- is.na(dataset[,feature])
numNA <- length(idx)
samples <- sample(dataset[!is.na(dataset[,feature]),feature], numNA)
dataset[idx,feature] <- samples
return(dataset)
}
knn_imputation_solution <- function(dataset, k) {
dataset_new <- impute.knn(t(dataset[,-(1,2)]), k = k)
dataset_new <- as.data.frame(t(dataset_new$data))
return(dataset_new)
}