n <- 150 # Number of students
x <- expand.grid(rep(list(c(LETTERS)), 2))
options <- do.call(paste0, x)
names <- sample(options, size = n, replace = T)
points <- c(sample(seq(40, 50, 0.1), 15, replace = T),
sample(seq(50, 60, 0.1), 46, replace = T),
sample(seq(60, 70, 0.1), 25, replace = T),
sample(seq(70, 80, 0.1), 47, replace = T),
sample(seq(80, 100, 0.1), 17, replace = T))
data <- data.frame(names, points)
hist(points, xlim=c(0,100), border = F, col = 'tan') # Not normal
shapiro.test(points) # ... with high degree of certainty...
##
## Shapiro-Wilk normality test
##
## data: points
## W = 0.97574, p-value = 0.009315
qqnorm(points) # ...even visually.
cutoffs <- quantile(points, probs = pnorm(seq(-2,2))) # Select quantiles with frequency distr like SD of N(0,1)
data$final <- cut(data$points,
breaks=c(-Inf,cutoffs,+Inf),
labels=c(LETTERS[6:1])) # And we assign final grades based on these cutoffs
barplot(prop.table(table(data$final))) # Now they look normal
(freq_distr <- prop.table(table(data$final))) # These is how the grades are distributed.
##
## F E D C B A
## 0.02666667 0.13333333 0.34000000 0.34000000 0.13333333 0.02666667
(Normal_distr <- c(pnorm(-2), pnorm(-1) - pnorm(-2), pnorm(0) - pnorm(-1),
pnorm(1) - pnorm(0), pnorm(2) - pnorm(1), pnorm(2, lower.tail = F)))
## [1] 0.02275013 0.13590512 0.34134475 0.34134475 0.13590512 0.02275013
# This is how the normal would have them.
chisq.test(freq_distr, p = Normal_distr) #And the GOF confirms normality.
## Warning in chisq.test(freq_distr, p = Normal_distr): Chi-squared approximation
## may be incorrect
##
## Chi-squared test for given probabilities
##
## data: freq_distr
## X-squared = 0.0014564, df = 5, p-value = 1
Alternatively, the percentile can be given as the final grade, or used as an additional grade (percentile + ordinal score):
zscores <- scale(points) # Normalizes the points
mean(zscores); sd(zscores) #N(0,1)
## [1] 3.006059e-16
## [1] 1
hist(zscores, breaks = 18, border = F, col = 'tan') # The data distribution is normalized but not distributed normally
percentiles <- pnorm(zscores) * 100
plot(percentiles, pch = 20, col = 2)
At this point we can assign the percentiles as the final grade:
data$percentiles <- round(percentiles)
tail(data)
## names points final percentiles
## 145 KI 84.8 B 93
## 146 KG 97.1 A 99
## 147 EI 87.0 B 95
## 148 TM 90.2 B 97
## 149 MH 80.2 B 86
## 150 WX 87.4 B 95
Or we can assign grades based on percentiles as we did with “points”:
cutoffs <- quantile(percentiles, probs = pnorm(seq(-2,2)))
data$alternative <- cut(data$percentiles,
breaks=c(-Inf,cutoffs,+Inf),
labels=c(LETTERS[6:1])) # And we assign final grades based on these cutoffs
tail(data)
## names points final percentiles alternative
## 145 KI 84.8 B 93 B
## 146 KG 97.1 A 99 A
## 147 EI 87.0 B 95 B
## 148 TM 90.2 B 97 B
## 149 MH 80.2 B 86 B
## 150 WX 87.4 B 95 B
barplot(prop.table(table(data$alternative)))
NOTE: These are tentative notes on different topics for personal use - expect mistakes and misunderstandings.