import warnings
import numpy as np
from scipy.optimize import minimize
from penaltyblog.models.base_model import BaseGoalsModel
from penaltyblog.models.custom_types import (
GoalInput,
ParamsOutput,
TeamInput,
WeightInput,
)
from penaltyblog.models.football_probability_grid import (
FootballProbabilityGrid,
)
from .gradients import negative_binomial_gradient
from .loss import compute_negative_binomial_loss
from .probabilities import compute_negative_binomial_probabilities
[docs]
class NegativeBinomialGoalModel(BaseGoalsModel):
"""
Negative Binomial model for predicting outcomes of football (soccer) matches
handling overdispersion in goal data.
Methods
-------
fit()
fits a Negative Binomial model to the data to calculate the team strengths.
Must be called before the model can be used to predict game outcomes
predict(home_team, away_team, max_goals=10)
predicts the probability of each scoreline for a given home and away team
get_params()
provides access to the model's fitted parameters
"""
[docs]
def __init__(
self,
goals_home: GoalInput,
goals_away: GoalInput,
teams_home: TeamInput,
teams_away: TeamInput,
weights: WeightInput = None,
):
"""
Initialises the NegativeBinomialGoalModel class.
Parameters
----------
goals_home : array_like
The number of goals scored by the home team
goals_away : array_like
The number of goals scored by the away team
teams_home : array_like
The names of the home teams
teams_away : array_like
The names of the away teams
weights : array_like, optional
The weights of the matches, by default None
"""
super().__init__(goals_home, goals_away, teams_home, teams_away, weights)
self._params = np.concatenate(
([1] * self.n_teams, [-1] * self.n_teams, [0.25], [0.1])
) # Home advantage and dispersion parameter
def __repr__(self):
lines = ["Module: Penaltyblog", "", "Model: Negative Binomial", ""]
if not self.fitted:
lines.append("Status: Model not fitted")
return "\n".join(lines)
lines.extend(
[
f"Number of parameters: {self.n_params}",
f"Log Likelihood: {round(self.loglikelihood, 3)}",
f"AIC: {round(self.aic, 3)}",
"",
"{0: <20} {1:<20} {2:<20}".format("Team", "Attack", "Defence"),
"-" * 60,
]
)
for idx, team in enumerate(self.teams):
lines.append(
"{0: <20} {1:<20} {2:<20}".format(
team,
round(self._params[idx], 3),
round(self._params[idx + self.n_teams], 3),
)
)
lines.extend(
[
"-" * 60,
f"Home Advantage: {round(self._params[-2], 3)}",
f"Dispersion: {round(self._params[-1], 3)}",
]
)
return "\n".join(lines)
def _gradient(self, params):
attack = np.asarray(params[: self.n_teams], dtype=np.double, order="C")
defence = np.asarray(
params[self.n_teams : 2 * self.n_teams], dtype=np.double, order="C"
)
hfa = params[-2] # Home field advantage
dispersion = params[-1] # Dispersion parameter
grad = negative_binomial_gradient(
attack,
defence,
hfa,
dispersion,
self.home_idx,
self.away_idx,
self.goals_home,
self.goals_away,
self.weights,
)
return np.clip(grad, 1e-5, 100)
def _loss_function(self, params: np.ndarray) -> float:
"""
Calculates the negative log-likelihood of the Negative Binomial model.
Parameters
----------
params : array_like
The parameters of the model
data : dict
The data used to fit the model
n_teams : int
The number of teams in the league
Returns
-------
float
The negative log-likelihood of the Negative Binomial model
"""
attack = np.asarray(params[: self.n_teams], dtype=np.double, order="C")
defence = np.asarray(
params[self.n_teams : 2 * self.n_teams], dtype=np.double, order="C"
)
hfa = params[-2]
dispersion = params[-1]
return compute_negative_binomial_loss(
self.goals_home,
self.goals_away,
self.weights,
self.home_idx,
self.away_idx,
attack,
defence,
hfa,
dispersion,
)
[docs]
def fit(self):
"""
Fits the Negative Binomial model to the data.
"""
options = {"maxiter": 1000, "disp": False}
bounds = [(-2, 2)] * self.n_teams * 2 + [(-4, 4), (1e-5, 1000)]
constraints = [
{"type": "eq", "fun": lambda x: sum(x[: self.n_teams]) - self.n_teams}
]
with warnings.catch_warnings():
warnings.filterwarnings("ignore")
self._res = minimize(
self._loss_function,
self._params,
bounds=bounds,
constraints=constraints,
options=options,
# jac=self._gradient,
)
if not self._res.success:
raise ValueError(f"Optimization failed with message: {self._res.message}")
self._params = self._res.x
self.n_params = len(self._params)
self.loglikelihood = -self._res.fun
self.aic = -2 * self.loglikelihood + 2 * self.n_params
self.fitted = True
[docs]
def predict(
self, home_team: str, away_team: str, max_goals=10
) -> FootballProbabilityGrid:
"""
Predicts the probability of each scoreline for a given home and away team.
Parameters
----------
home_team : str
The name of the home team
away_team : str
The name of the away team
max_goals : int, optional
The maximum number of goals to consider, by default 10
Returns
-------
FootballProbabilityGrid
A FootballProbabilityGrid object containing the probability of each scoreline
"""
if not self.fitted:
raise ValueError("Model has not been fitted yet.")
home_idx = self.team_to_idx[home_team]
away_idx = self.team_to_idx[away_team]
home_attack = self._params[home_idx]
away_attack = self._params[away_idx]
home_defense = self._params[home_idx + self.n_teams]
away_defense = self._params[away_idx + self.n_teams]
home_advantage = self._params[-2]
dispersion = self._params[-1]
# Preallocate the score matrix as a flattened array.
score_matrix = np.empty(max_goals * max_goals, dtype=np.float64)
# Allocate one-element arrays for lambda values.
lambda_home = np.empty(1, dtype=np.float64)
lambda_away = np.empty(1, dtype=np.float64)
compute_negative_binomial_probabilities(
float(home_attack),
float(away_attack),
float(home_defense),
float(away_defense),
float(home_advantage),
float(dispersion),
int(max_goals),
score_matrix,
lambda_home,
lambda_away,
)
score_matrix.shape = (max_goals, max_goals)
return FootballProbabilityGrid(score_matrix, lambda_home, lambda_away)
[docs]
def get_params(self) -> ParamsOutput:
"""
Returns the parameters of the Negative Binomial model.
"""
if not self.fitted:
raise ValueError(
"Model's parameters have not been fit yet, please call the `fit()` function first"
)
params = dict(
zip(
["attack_" + team for team in self.teams]
+ ["defence_" + team for team in self.teams]
+ ["home_advantage", "dispersion"],
self._params,
)
)
return params