Goal Expectancy#
Estimates the goal expectancy (the number of goals the bookmaker expects each team to score) based on their home, draw, away probabilities
[1]:
import penaltyblog as pb
import pandas as pd
Get data from football-data.co.uk#
[2]:
fb = pb.scrapers.FootballData("ENG Premier League", "2019-2020")
df = fb.get_fixtures()
cols = ["team_home", "team_away", "psh", "psd", "psa"]
df = df[cols]
df.head()
[2]:
team_home | team_away | psh | psd | psa | |
---|---|---|---|---|---|
id | |||||
1565308800---liverpool---norwich | Liverpool | Norwich | 1.15 | 9.59 | 18.05 |
1565395200---bournemouth---sheffield_united | Bournemouth | Sheffield United | 2.04 | 3.57 | 3.90 |
1565395200---burnley---southampton | Burnley | Southampton | 2.71 | 3.31 | 2.81 |
1565395200---crystal_palace---everton | Crystal Palace | Everton | 3.21 | 3.37 | 2.39 |
1565395200---tottenham---aston_villa | Tottenham | Aston Villa | 1.30 | 5.84 | 10.96 |
Remove the overround from the odds#
[3]:
def remove_overround(row):
odds = [
row["psh"],
row["psd"],
row["psa"],
]
odds = pb.implied.differential_margin_weighting(odds)
return pd.Series(odds["implied_probabilities"])
df[["home", "draw", "away"]] = df.apply(remove_overround, axis=1)
df.head()
[3]:
team_home | team_away | psh | psd | psa | home | draw | away | |
---|---|---|---|---|---|---|---|---|
id | ||||||||
1565308800---liverpool---norwich | Liverpool | Norwich | 1.15 | 9.59 | 18.05 | 0.859818 | 0.094528 | 0.045654 |
1565395200---bournemouth---sheffield_united | Bournemouth | Sheffield United | 2.04 | 3.57 | 3.90 | 0.481290 | 0.271206 | 0.247504 |
1565395200---burnley---southampton | Burnley | Southampton | 2.71 | 3.31 | 2.81 | 0.360007 | 0.293118 | 0.346875 |
1565395200---crystal_palace---everton | Crystal Palace | Everton | 3.21 | 3.37 | 2.39 | 0.302636 | 0.287845 | 0.409519 |
1565395200---tottenham---aston_villa | Tottenham | Aston Villa | 1.30 | 5.84 | 10.96 | 0.758663 | 0.160665 | 0.080673 |
Get the goal expectancy#
[4]:
output = list()
for idx, row in df.head().iterrows():
res = pb.models.goal_expectancy(row["home"], row["draw"], row["away"])
tmp = {
"team_home": row["team_home"],
"team_away": row["team_away"],
"home_expectancy": res["home_exp"],
"away_expectancy": res["away_exp"],
"success": res["success"],
"error": res["error"],
}
output.append(tmp)
output = pd.DataFrame(output)
output
[4]:
team_home | team_away | home_expectancy | away_expectancy | success | error | |
---|---|---|---|---|---|---|
0 | Liverpool | Norwich | 3.219933 | 0.667064 | True | 5.381587e-09 |
1 | Bournemouth | Sheffield United | 1.403588 | 0.923020 | True | 5.932409e-11 |
2 | Burnley | Southampton | 1.099876 | 1.073906 | True | 1.595460e-11 |
3 | Crystal Palace | Everton | 0.999968 | 1.213245 | True | 6.410602e-11 |
4 | Tottenham | Aston Villa | 2.312856 | 0.607438 | True | 1.123882e-10 |
[ ]: