## load data
data("FIFA2018", package = "distributions3")
## observed relative frequencies of goals in all matches
obsrvd <- prop.table(table(FIFA2018$goals))
## expected probabilities assuming a simple Poisson model,
## using the average number of goals across all teams/matches
## as the point estimate for the mean (lambda) of the distribution
p_const <- Poisson(lambda = mean(FIFA2018$goals))
p_const
expctd <- pdf(p_const, 0:6)
## comparison: observed vs. expected frequencies
## frequencies for 3 and 4 goals are slightly overfitted
## while 5 and 6 goals are slightly underfitted
cbind("observed" = obsrvd, "expected" = expctd)
## instead of fitting the same average Poisson model to all
## teams/matches, take ability differences into account
m <- glm(goals ~ difference, data = FIFA2018, family = poisson)
summary(m)
## when the ratio of abilities increases by 1 percent, the
## expected number of goals increases by around 0.4 percent
## this yields a different predicted Poisson distribution for
## each team/match
p_reg <- Poisson(lambda = fitted(m))
head(p_reg)
## as an illustration, the following goal distributions
## were expected for the final (that France won 4-2 against Croatia)
p_final <- tail(p_reg, 2)
p_final
pdf(p_final, 0:6)
## clearly France was expected to score more goals than Croatia
## but both teams scored more goals than expected, albeit not unlikely many
## assuming independence of the number of goals scored, obtain
## table of possible match results (after normal time), along with
## overall probabilities of win/draw/lose
res <- outer(pdf(p_final[1], 0:6), pdf(p_final[2], 0:6))
sum(res[lower.tri(res)]) ## France wins
sum(diag(res)) ## draw
sum(res[upper.tri(res)]) ## France loses
## update expected frequencies table based on regression model
expctd <- pdf(p_reg, 0:6)
head(expctd)
expctd <- colMeans(expctd)
cbind("observed" = obsrvd, "expected" = expctd)
Run the code above in your browser using DataLab