Comparison of different models Bundesliga Dataset

Author

Oliver Dürr

The experiments take some time to run, therefore we used the R-Script to producte the results https://github.com/oduerr/da/blob/master/website/Euro24/eval_performance_runner.R.

Loading the data

Code

  df = read.csv('~/Documents/GitHub/da/website/Euro24/eval_performance_bundesliga_23.csv')
  df <- mutate(df, name = sub("^.*/", "", name))
  
  #Add additional run with home advantage
  df2 = read.csv('~/Documents/GitHub/da/website/Euro24/eval_performance_bundesliga_23_home_adv.csv')
  df2 <- mutate(df2, name = sub("^.*/", "", name))
  df <- rbind(df, df2)
  
  # Add additional run with 18 games ahead
  df3 = read.csv('~/Documents/GitHub/da/website/Euro24/eval_performance_bundesliga_23_18Ahead.csv')
  df = df3
  print("Using 24_18Ahead that means all data is averaged 18 games ahead.")

[1] "Using 24_18Ahead that means all data is averaged 18 games ahead."

Code

  #df = read.csv('~/Documents/GitHub/da/website/Euro24/eval_performance_bundesliga_23_bets05_18Ahead.csv')
  
  df_raw = read.csv('~/Documents/GitHub/da/website/Euro24/bundesliga2023.csv')
  head(df_raw) %>% kable()

Div	Date	Time	HomeTeam	AwayTeam	FTHG	FTAG	FTR	HTHG	HTAG	HTR	HS	AS	HST	AST	HF	AF	HC	AC	HY	AY	B365H	B365D	B365A	BWH	BWD	BWA	IWH	IWD	IWA	PSH	PSD	PSA	WHH	WHD	WHA	VCH	VCD	VCA	MaxH	MaxD	MaxA	AvgH	AvgD	AvgA	B365.2.5	B365.2.5.1	P.2.5	P.2.5.1	Max.2.5	Max.2.5.1	Avg.2.5	Avg.2.5.1	AHh	B365AHH	B365AHA	PAHH	PAHA	MaxAHH	MaxAHA	AvgAHH	AvgAHA	B365CH	B365CD	B365CA	BWCH	BWCD	BWCA	IWCH	IWCD	IWCA	PSCH	PSCD	PSCA	WHCH	WHCD	WHCA	VCCH	VCCD	VCCA	MaxCH	MaxCD	MaxCA	AvgCH	AvgCD	AvgCA	B365C.2.5	B365C.2.5.1	PC.2.5	PC.2.5.1	MaxC.2.5	MaxC.2.5.1	AvgC.2.5	AvgC.2.5.1	AHCh	B365CAHH	B365CAHA	PCAHH	PCAHA	MaxCAHH	MaxCAHA	AvgCAHH	AvgCAHA
D1	18/08/2023	19:30	Werder Bremen	Bayern Munich	0	4	A	0	1	A	6	25	1	10	16	11	0	6	2	1	7.50	6.50	1.30	8.00	6.0	1.31	7.00	5.25	1.40	8.59	6.36	1.33	8.00	5.50	1.22	9.00	6.0	1.30	9.50	6.50	1.40	8.50	6.09	1.32	1.33	3.4	1.35	3.41	1.37	3.55	1.33	3.34	1.75	1.87	2.06	1.84	2.07	NA	NA	1.84	2.01	8.50	6.0	1.30	8.50	6.0	1.3	7.00	5.25	1.40	8.80	6.30	1.31	8.00	5.50	1.22	9.50	6.00	1.29	9.50	6.50	1.40	8.69	6.05	1.31	1.36	3.20	1.34	3.38	1.40	3.58	1.34	3.29	1.75	1.85	2.08	1.88	2.03	1.92	2.09	1.85	2.00
D1	19/08/2023	14:30	Augsburg	M'gladbach	4	4	D	3	3	D	20	9	8	6	9	14	8	7	1	2	2.70	3.60	2.45	2.70	3.5	2.45	2.75	3.60	2.45	2.74	3.78	2.51	2.50	3.40	2.30	2.70	3.7	2.45	2.80	3.88	2.54	2.72	3.69	2.47	1.62	2.3	1.63	2.39	1.66	2.43	1.61	2.34	0.00	2.03	1.87	2.04	1.87	2.07	1.90	2.02	1.84	2.80	3.6	2.38	2.75	3.6	2.4	2.75	3.65	2.45	2.92	3.72	2.43	2.50	3.40	2.30	2.75	3.75	2.38	2.92	3.87	2.51	2.79	3.67	2.42	1.62	2.30	1.67	2.33	1.67	2.40	1.62	2.31	0.25	1.82	2.11	1.83	2.11	1.83	2.18	1.77	2.09
D1	19/08/2023	14:30	Hoffenheim	Freiburg	1	2	A	0	2	A	24	17	5	8	9	9	4	2	0	1	2.30	3.60	2.90	2.25	3.5	3.10	2.30	3.55	2.95	2.43	3.67	2.92	2.10	3.40	2.80	2.38	3.6	2.88	2.50	3.85	3.10	2.35	3.63	2.94	1.67	2.2	1.67	2.30	1.71	2.32	1.66	2.24	-0.25	2.06	1.84	2.11	1.82	2.11	1.88	2.03	1.82	2.15	3.6	3.10	2.20	3.6	3.1	2.25	3.60	3.00	2.20	3.74	3.32	2.10	3.40	2.80	2.15	3.75	3.13	2.30	3.87	3.32	2.20	3.68	3.15	1.67	2.20	1.69	2.28	1.70	2.36	1.65	2.26	-0.25	1.91	2.02	1.92	2.01	1.98	2.02	1.91	1.95
D1	19/08/2023	14:30	Leverkusen	RB Leipzig	3	2	H	2	1	H	11	13	7	6	13	10	4	5	1	2	2.45	3.60	2.75	2.45	3.6	2.70	2.50	3.55	2.70	2.50	3.61	2.85	2.30	3.25	2.60	2.45	3.6	2.75	2.56	3.75	2.91	2.47	3.62	2.78	1.67	2.2	1.70	2.24	1.77	2.28	1.68	2.20	0.00	1.83	2.07	1.83	2.09	1.89	2.11	1.82	2.05	2.38	3.6	2.80	2.37	3.5	2.8	2.50	3.60	2.70	2.48	3.63	2.98	2.30	3.25	2.60	2.40	3.60	2.80	2.50	3.78	2.99	2.44	3.60	2.81	1.67	2.20	1.74	2.20	1.75	2.32	1.68	2.19	-0.25	2.11	1.82	2.14	1.84	2.14	1.84	2.09	1.78
D1	19/08/2023	14:30	Stuttgart	Bochum	5	0	H	2	0	H	19	4	9	1	5	12	7	0	1	1	1.65	4.33	4.50	1.68	4.0	4.75	1.70	4.10	4.50	1.70	4.25	4.78	1.57	3.90	4.33	1.70	4.1	4.60	1.77	4.40	5.00	1.69	4.17	4.69	1.62	2.3	1.66	2.32	1.71	2.38	1.64	2.29	-0.75	1.88	2.02	1.89	2.01	1.95	2.04	1.86	1.98	1.73	4.2	4.20	1.77	3.9	4.2	1.77	4.10	4.20	1.78	4.16	4.52	1.67	3.60	4.00	1.73	4.10	4.33	1.85	4.20	4.52	1.76	4.02	4.32	1.62	2.30	1.67	2.33	1.70	2.35	1.64	2.26	-0.75	1.98	1.95	1.99	1.93	2.06	1.96	1.95	1.89
D1	19/08/2023	14:30	Wolfsburg	Heidenheim	2	0	H	2	0	H	18	13	8	1	10	9	1	7	3	0	1.60	4.20	5.25	1.61	4.2	5.25	1.63	4.20	5.25	1.62	4.49	5.24	1.50	4.00	4.75	1.60	4.4	5.00	1.66	4.60	5.50	1.62	4.31	5.19	1.62	2.3	1.63	2.39	1.66	2.44	1.61	2.34	-1.00	2.03	1.87	2.03	1.88	2.07	1.88	2.01	1.84	1.62	4.2	5.00	1.65	4.1	4.8	1.65	4.20	4.90	1.66	4.42	5.16	1.50	4.00	4.75	1.62	4.33	5.00	1.69	4.50	5.25	1.64	4.28	5.03	1.57	2.38	1.60	2.49	1.67	2.49	1.59	2.39	-1.00	2.13	1.81	2.10	1.82	2.14	1.85	2.07	1.80

Exploratory Analysis

Code

#str(df_raw)
#Full Time Home Goals, Full Time Away Goals
home_wins = df_raw$FTHG > df_raw$FTAG
away_wins = df_raw$FTHG < df_raw$FTAG
draws = df_raw$FTHG == df_raw$FTAG

# Betting on home wins would have some log(0) --> -Inf

# Betting according frequency
# This is a bit cheating, since we use the future data to calculate
ps_naive = c(sum(home_wins), sum(draws), sum(away_wins))/nrow(df_raw)
NLL_NAIVE = sum(
  -home_wins*log(ps_naive[1])
  -draws*log(ps_naive[2])
  -away_wins*log(ps_naive[3])
)/nrow(df_raw)
NLL_NAIVE

[1] 1.074078

Model Comparisons

We use the negative log likelihood (NLL) as a measure of the predictive performance of the models. The lower the NLL, the better the model. However, strictly speaking it is the negative log posterior predictive density (divided by \(n\)) evaluated at the \(n\) games after the training data.

\[ \text{NLL} = -\frac{1}{n}\sum_{i=1}^n \log p(y_i | x_i, \theta) \]

Code

   # Assuming df is your dataframe
  df %>% filter(type == 'NLL_PRED') %>% 
    ggplot(aes(x = ntrain, y = res, color = name)) + 
    geom_line() + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset', 
      x = 'Number of training data', 
      y = 'Negative Log Likelihood'
    ) + 
    #ylim(2.9, 3.5) +
    #xlim(0,100) +
    theme(legend.position = "top") +
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Observations

Especially for small training data, the hierarchical model performs better than the non-hierarchical model.
The Correlated Dataset model performs slightly better than non-correlated one
There is partically no difference in predictive performance when comparing the model with and without Cholesky decomposition.
The negative binomial model performs comparable to Poisson model.

Comparison of predicted vs PSIS-LOO

Code

  df %>% filter(type %in% c('NLL_PRED', 'NLL_PSIS', 'NLL_PRED_STAN')) %>%
    ggplot(aes(x = ntrain, y = res, color = type)) + 
    geom_line(aes(linetype = type)) + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title =  'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Negative Log Likelihood'
    ) + 
    ylim(2.5, 4) +
    facet_wrap(~name) +
    theme(legend.position = "bottom") + 
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Observations

NNL for win, draws and losses

Code

  df %>% 
    filter(type %in% c('NLL_RESULTS', 'NLL_BOOKIE')) %>%
    ggplot(aes(x = ntrain, y = res, color = type)) + 
    geom_line(aes(linetype = name)) + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Negative Log Likelihood'
    ) + 
    ylim(0.75, 1.5) +
    geom_hline(yintercept = NLL_NAIVE, col = "green", alpha=0.5) +
    theme(legend.position = "bottom") + 
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Code

#### Booki mean
  dfp = df %>% 
    filter(type %in% c('NLL_RESULTS', 'NLL_BOOKIE')) %>%
    group_by(name, type) %>% 
    summarise(nll = mean(res)) 
  
  dfp = rbind(dfp, data.frame(name = "NLL_NAIVE", type = "NLL_NAIVE", nll = NLL_NAIVE))
  # Remove all but one NLL_BOOKIE
  kableExtra::kable(dfp, digits = 3)

name	type	nll
da/stan/football/hier_model_nb	NLL_BOOKIE	0.953
da/stan/football/hier_model_nb	NLL_RESULTS	0.983
da/stan/football/non_hier_model	NLL_BOOKIE	0.953
da/stan/football/non_hier_model	NLL_RESULTS	1.011
da/website/Euro24/hier_model	NLL_BOOKIE	0.953
da/website/Euro24/hier_model	NLL_RESULTS	0.983
da/website/Euro24/hier_model_cor	NLL_BOOKIE	0.953
da/website/Euro24/hier_model_cor	NLL_RESULTS	0.992
da/website/Euro24/hier_model_cor_home	NLL_BOOKIE	0.953
da/website/Euro24/hier_model_cor_home	NLL_RESULTS	0.992
da/website/Euro24/hier_model_cor_nocholsky	NLL_BOOKIE	0.953
da/website/Euro24/hier_model_cor_nocholsky	NLL_RESULTS	0.992
NLL_NAIVE	NLL_NAIVE	1.074

Observations

The NLL for the bookie is always better than the NLL of the models, so we should not bet.

Ranked probability score

Code

  df %>% 
    filter(type %in% c('RPS', 'rps_booki')) %>%
    ggplot(aes(x = ntrain, y = res, color = type)) + 
    geom_line(aes(linetype = name)) + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Ranked Probability Score'
    ) + 
    #ylim(0.75, 1.5) +
    theme(legend.position = "bottom") + 
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Averaged for the complete season:

Code

  library(dplyr)
  dfres = df %>% 
    filter(type %in% c('RPS', 'rps_booki')) %>%
    group_by(name, type) %>% 
    summarise(rps = mean(res))

`summarise()` has grouped output by 'name'. You can override using the
`.groups` argument.

Code

  rps_booki = dfres %>% filter(type == 'rps_booki') 
  
  dfres2 = dfres %>% filter(type == 'RPS') %>% dplyr::select(name, rps) 
  dfres2 = rbind(dfres2, rps_booki[1,])

  # Sort by RPS
  dfres2 = dfres2[order(dfres2$rps),]
  kable(dfres2)

name	rps	type
da/stan/football/hier_model_nb	0.1865408	rps_booki
da/website/Euro24/hier_model_cor_home	0.1979763	NA
da/stan/football/hier_model_nb	0.1982434	NA
da/website/Euro24/hier_model	0.1984227	NA
da/website/Euro24/hier_model_cor	0.1993777	NA
da/website/Euro24/hier_model_cor_nocholsky	0.1994641	NA
da/stan/football/non_hier_model	0.2032188	NA

Betting Returns

Code

  df %>% 
    filter(type %in% c('BET_RETURN')) %>%
    ggplot(aes(x = ntrain, y = res, color = name)) + 
    geom_line(aes(linetype = name)) +  
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Betting Returns'
    ) + 
    #ylim(0.75, 1.5) +
    theme(legend.position = "bottom") +  
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Code

  df %>% 
    filter(type %in% c('BET_RETURN')) %>%
    group_by(name) %>% 
    summarise(mean(res))

# A tibble: 6 × 2
  name                                       `mean(res)`
  <chr>                                            <dbl>
1 da/stan/football/hier_model_nb                  0.0635
2 da/stan/football/non_hier_model                 0.0137
3 da/website/Euro24/hier_model                    0.0660
4 da/website/Euro24/hier_model_cor               -0.120 
5 da/website/Euro24/hier_model_cor_home          -0.0905
6 da/website/Euro24/hier_model_cor_nocholsky     -0.122

Observations

We see quite some fluctuation in the betting return. Since the NLL shows that the odds from the booki are always better then the NLLs of the models we should not bet.

Technical Details

Code

  df %>% 
    filter(type %in% c('MIN_SUM_PROB')) %>%
    ggplot(aes(x = ntrain, y = res, color = name)) + 
    geom_line(aes(linetype = name)) + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Sum of Probabilities from 0 to 10 goals (should be 1)'
    ) + 
    ylim(0.75, 1.01) +
    theme(legend.position = "bottom") + 
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Code

  df %>% 
    filter(type %in% c('num_divergent')) %>%
    ggplot(aes(x = ntrain, y = res, color = name)) + 
    geom_line(aes(linetype = name)) + 
    geom_point() + 
    theme_minimal() + 
    labs(
      title = 'Comparison of different models for the Bundesliga 2023 dataset',
      x = 'Number of training data', 
      y = 'Number of Divergent Transitions (sqrt scale)'
    ) + 
    theme(legend.position = "bottom") + 
    scale_y_sqrt() + 
    coord_cartesian(clip = "off") # Allow lines to go outside the plot area

Code

df %>% 
  filter(type %in% c('ebfmi')) %>%
  ggplot(aes(x = ntrain, y = res, color = name)) + 
  geom_line(aes(linetype = name)) + 
  geom_point() + 
  theme_minimal() + 
  labs(
    title = 'Comparison of different models for the Bundesliga 2023 dataset',
    x = 'Number of training data', 
    y = 'ebfmi'
  ) + 
  theme(legend.position = "bottom") + 
  geom_hline(yintercept = 0.3, linetype = "dashed", color = "red") +
  annotate("text", x = Inf, y = 0.33, label = "Acceptable", hjust = 1.1, color = "red") +
  annotate("text", x = Inf, y = 0.27, label = "Non-Acceptable", hjust = 1.1, color = "red") +
  coord_cartesian(clip = "off") # Allow lines to go outside the plot area