{
"cells": [
{
"cell_type": "markdown",
"id": "81215b75-f5f8-4c17-9cd4-08b1b0ed4234",
"metadata": {},
"source": [
"# Goal Expectancy\n",
"\n",
"Estimates the goal expectancy (the number of goals the bookmaker expects each team to score) based on their home, draw, away probabilities"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "1f931497-c1f9-4cb4-969a-058676e42a24",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import penaltyblog as pb\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "4a1b5c76-8f47-4f59-8351-d5add2f69309",
"metadata": {},
"source": [
"## Get data from football-data.co.uk"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "949b129d-e4e5-4975-8318-dd601d918e90",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" team_home | \n",
" team_away | \n",
" psh | \n",
" psd | \n",
" psa | \n",
"
\n",
" \n",
" id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1565308800---liverpool---norwich | \n",
" Liverpool | \n",
" Norwich | \n",
" 1.15 | \n",
" 9.59 | \n",
" 18.05 | \n",
"
\n",
" \n",
" 1565395200---bournemouth---sheffield_united | \n",
" Bournemouth | \n",
" Sheffield United | \n",
" 2.04 | \n",
" 3.57 | \n",
" 3.90 | \n",
"
\n",
" \n",
" 1565395200---burnley---southampton | \n",
" Burnley | \n",
" Southampton | \n",
" 2.71 | \n",
" 3.31 | \n",
" 2.81 | \n",
"
\n",
" \n",
" 1565395200---crystal_palace---everton | \n",
" Crystal Palace | \n",
" Everton | \n",
" 3.21 | \n",
" 3.37 | \n",
" 2.39 | \n",
"
\n",
" \n",
" 1565395200---tottenham---aston_villa | \n",
" Tottenham | \n",
" Aston Villa | \n",
" 1.30 | \n",
" 5.84 | \n",
" 10.96 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" team_home team_away \\\n",
"id \n",
"1565308800---liverpool---norwich Liverpool Norwich \n",
"1565395200---bournemouth---sheffield_united Bournemouth Sheffield United \n",
"1565395200---burnley---southampton Burnley Southampton \n",
"1565395200---crystal_palace---everton Crystal Palace Everton \n",
"1565395200---tottenham---aston_villa Tottenham Aston Villa \n",
"\n",
" psh psd psa \n",
"id \n",
"1565308800---liverpool---norwich 1.15 9.59 18.05 \n",
"1565395200---bournemouth---sheffield_united 2.04 3.57 3.90 \n",
"1565395200---burnley---southampton 2.71 3.31 2.81 \n",
"1565395200---crystal_palace---everton 3.21 3.37 2.39 \n",
"1565395200---tottenham---aston_villa 1.30 5.84 10.96 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fb = pb.scrapers.FootballData(\"ENG Premier League\", \"2019-2020\")\n",
"df = fb.get_fixtures()\n",
"\n",
"cols = [\"team_home\", \"team_away\", \"psh\", \"psd\", \"psa\"]\n",
"df = df[cols]\n",
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "faf4dd3f-34ca-4547-bcac-46d45ae2e1f2",
"metadata": {},
"source": [
"## Remove the overround from the odds"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "57093b8f-9725-4740-9a93-35da39171380",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" team_home | \n",
" team_away | \n",
" psh | \n",
" psd | \n",
" psa | \n",
" home | \n",
" draw | \n",
" away | \n",
"
\n",
" \n",
" id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 1565308800---liverpool---norwich | \n",
" Liverpool | \n",
" Norwich | \n",
" 1.15 | \n",
" 9.59 | \n",
" 18.05 | \n",
" 0.859818 | \n",
" 0.094528 | \n",
" 0.045654 | \n",
"
\n",
" \n",
" 1565395200---bournemouth---sheffield_united | \n",
" Bournemouth | \n",
" Sheffield United | \n",
" 2.04 | \n",
" 3.57 | \n",
" 3.90 | \n",
" 0.481290 | \n",
" 0.271206 | \n",
" 0.247504 | \n",
"
\n",
" \n",
" 1565395200---burnley---southampton | \n",
" Burnley | \n",
" Southampton | \n",
" 2.71 | \n",
" 3.31 | \n",
" 2.81 | \n",
" 0.360007 | \n",
" 0.293118 | \n",
" 0.346875 | \n",
"
\n",
" \n",
" 1565395200---crystal_palace---everton | \n",
" Crystal Palace | \n",
" Everton | \n",
" 3.21 | \n",
" 3.37 | \n",
" 2.39 | \n",
" 0.302636 | \n",
" 0.287845 | \n",
" 0.409519 | \n",
"
\n",
" \n",
" 1565395200---tottenham---aston_villa | \n",
" Tottenham | \n",
" Aston Villa | \n",
" 1.30 | \n",
" 5.84 | \n",
" 10.96 | \n",
" 0.758663 | \n",
" 0.160665 | \n",
" 0.080673 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" team_home team_away \\\n",
"id \n",
"1565308800---liverpool---norwich Liverpool Norwich \n",
"1565395200---bournemouth---sheffield_united Bournemouth Sheffield United \n",
"1565395200---burnley---southampton Burnley Southampton \n",
"1565395200---crystal_palace---everton Crystal Palace Everton \n",
"1565395200---tottenham---aston_villa Tottenham Aston Villa \n",
"\n",
" psh psd psa home \\\n",
"id \n",
"1565308800---liverpool---norwich 1.15 9.59 18.05 0.859818 \n",
"1565395200---bournemouth---sheffield_united 2.04 3.57 3.90 0.481290 \n",
"1565395200---burnley---southampton 2.71 3.31 2.81 0.360007 \n",
"1565395200---crystal_palace---everton 3.21 3.37 2.39 0.302636 \n",
"1565395200---tottenham---aston_villa 1.30 5.84 10.96 0.758663 \n",
"\n",
" draw away \n",
"id \n",
"1565308800---liverpool---norwich 0.094528 0.045654 \n",
"1565395200---bournemouth---sheffield_united 0.271206 0.247504 \n",
"1565395200---burnley---southampton 0.293118 0.346875 \n",
"1565395200---crystal_palace---everton 0.287845 0.409519 \n",
"1565395200---tottenham---aston_villa 0.160665 0.080673 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def remove_overround(row):\n",
" odds = [\n",
" row[\"psh\"],\n",
" row[\"psd\"],\n",
" row[\"psa\"],\n",
" ]\n",
"\n",
" odds = pb.implied.differential_margin_weighting(odds)\n",
"\n",
" return pd.Series(odds[\"implied_probabilities\"])\n",
"\n",
"\n",
"df[[\"home\", \"draw\", \"away\"]] = df.apply(remove_overround, axis=1)\n",
"\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "9257f0fc-5f2b-402f-9209-d005d14880be",
"metadata": {},
"source": [
"## Get the goal expectancy"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6b8b7d83-273e-4ab1-bac0-b7387556a03f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" team_home | \n",
" team_away | \n",
" home_expectancy | \n",
" away_expectancy | \n",
" success | \n",
" error | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Liverpool | \n",
" Norwich | \n",
" 3.219933 | \n",
" 0.667064 | \n",
" True | \n",
" 5.381587e-09 | \n",
"
\n",
" \n",
" 1 | \n",
" Bournemouth | \n",
" Sheffield United | \n",
" 1.403588 | \n",
" 0.923020 | \n",
" True | \n",
" 5.932409e-11 | \n",
"
\n",
" \n",
" 2 | \n",
" Burnley | \n",
" Southampton | \n",
" 1.099876 | \n",
" 1.073906 | \n",
" True | \n",
" 1.595460e-11 | \n",
"
\n",
" \n",
" 3 | \n",
" Crystal Palace | \n",
" Everton | \n",
" 0.999968 | \n",
" 1.213245 | \n",
" True | \n",
" 6.410602e-11 | \n",
"
\n",
" \n",
" 4 | \n",
" Tottenham | \n",
" Aston Villa | \n",
" 2.312856 | \n",
" 0.607438 | \n",
" True | \n",
" 1.123882e-10 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" team_home team_away home_expectancy away_expectancy \\\n",
"0 Liverpool Norwich 3.219933 0.667064 \n",
"1 Bournemouth Sheffield United 1.403588 0.923020 \n",
"2 Burnley Southampton 1.099876 1.073906 \n",
"3 Crystal Palace Everton 0.999968 1.213245 \n",
"4 Tottenham Aston Villa 2.312856 0.607438 \n",
"\n",
" success error \n",
"0 True 5.381587e-09 \n",
"1 True 5.932409e-11 \n",
"2 True 1.595460e-11 \n",
"3 True 6.410602e-11 \n",
"4 True 1.123882e-10 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"output = list()\n",
"for idx, row in df.head().iterrows():\n",
" res = pb.models.goal_expectancy(row[\"home\"], row[\"draw\"], row[\"away\"])\n",
"\n",
" tmp = {\n",
" \"team_home\": row[\"team_home\"],\n",
" \"team_away\": row[\"team_away\"],\n",
" \"home_expectancy\": res[\"home_exp\"],\n",
" \"away_expectancy\": res[\"away_exp\"],\n",
" \"success\": res[\"success\"],\n",
" \"error\": res[\"error\"],\n",
" }\n",
"\n",
" output.append(tmp)\n",
"\n",
"output = pd.DataFrame(output)\n",
"output"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5997054-9134-43e3-9d08-797c183fe642",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}