Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 75
1
# Function to generate scatterplot and calculate correlation
2
plot_scatterplot <- function(real, imputed) {
3
# create plot data
4
plot_data <- data.frame(real = real, imputed = imputed);
5
# create scatterplot
6
print(ggplot(plot_data, aes(x=real, y=imputed)) + geom_point())
7
# print correlation
8
print(paste("Correlation:", cor(real, imputed)))
9
}
10
11
# function to generate NAs
12
generate_NAs <- function(dataset, feature, noNA) {
13
# find indexes to change to NA
14
idx <- sample(1:nrow(dataset), noNA*nrow(dataset), replace = FALSE)
15
# convert to NAs in dataset
16
dataset[idx,feature] <- NA
17
return(dataset)
18
}
19
20
# impute missing values using mean imputation
21
mean_imputation_solution <- function(dataset, feature) {
22
# find mean
23
meanv <- mean(dataset[,feature], rm.na = TRUE)
24
# replace NAs with mean
25
dataset[is.na(dataset[,feature]),feature] <- meanv
26
return(dataset)
27
}
28
29
# impute missing values using random imputation
30
random_imputation_solution <- function(dataset, feature) {
31
# find NAs
32
idx <- is.na(dataset[,feature])
33
numNA <- length(idx)
34
# sample from observed values number of NAs present
35
samples <- sample(dataset[!is.na(dataset[,feature]),feature], numNA)
36
dataset[idx,feature] <- samples
37
return(dataset)
38
}
39
40
# impuate missing values using knn features
41
knn_imputation_solution <- function(dataset, k) {
42
dataset_new <- impute.knn(t(dataset[,-(1,2)]), k = k)
43
dataset_new <- as.data.frame(t(dataset_new$data))
44
return(dataset_new)
45
}
46