{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 2, "kernelspec": { "name": "python392jvsc74a57bd0a27d3f2bf68df5402465348834a2195030d3fc5bfc8e594e2a17c8c7e2447c85", "display_name": "Python 3.9.2 64-bit ('ds-3.9': conda)" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "type(stat): , len(stat): 2\n" ] } ], "source": [ "# test with known url: 2021 PGA driving distance\n", "tst_url = \"https://www.pgatour.com/stats/stat.101.y2021.eon.t033.html\"\n", "stat = pd.read_html(tst_url)\n", "print(f\"type(stat): {type(stat)}, len(stat): {len(stat)}\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "type(stat[0]): , type(stat[1]): \n\n 0 1 2\n0 NaN It appears your browser may be outdated. For t... NaN\n\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " RANK THIS WEEK RANK LAST WEEK PLAYER NAME ROUNDS AVG. \\\n", "0 1 2 Bryson DeChambeau 4 327.6 \n", "1 2 NaN Dean Burmester 4 324.1 \n", "2 3 NaN Rory McIlroy 4 320.8 \n", "3 4 NaN Joaquin Niemann 4 320.1 \n", "4 5 NaN Garrick Higgo 4 318.6 \n", "\n", " TOTAL DISTANCE TOTAL DRIVES \n", "0 2621 8 \n", "1 2593 8 \n", "2 2566 8 \n", "3 2561 8 \n", "4 2549 8 " ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
RANK THIS WEEKRANK LAST WEEKPLAYER NAMEROUNDSAVG.TOTAL DISTANCETOTAL DRIVES
012Bryson DeChambeau4327.626218
12NaNDean Burmester4324.125938
23NaNRory McIlroy4320.825668
34NaNJoaquin Niemann4320.125618
45NaNGarrick Higgo4318.625498
\n
" }, "metadata": {}, "execution_count": 3 } ], "source": [ "print(f\"type(stat[0]): {type(stat[0])}, type(stat[1]): {type(stat[1])}\\n\")\n", "print(stat[0])\n", "print()\n", "stat[1].head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": " PLAYER NAME AVG. TOTAL DISTANCE TOTAL DRIVES\n0 Bryson DeChambeau 327.6 2621 8\n1 Dean Burmester 324.1 2593 8\n2 Rory McIlroy 320.8 2566 8\n3 Joaquin Niemann 320.1 2561 8\n4 Garrick Higgo 318.6 2549 8\n.. ... ... ... ...\n76 Joel Dahmen 287.5 2300 8\n77 Henrik Stenson 286.4 2291 8\n78 Steve Stricker 285.1 2281 8\n79 Tom Hoge 283.4 2267 8\n80 Russell Henley 282.6 2261 8\n\n[81 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PLAYER NAMEAVG.TOTAL DISTANCETOTAL DRIVES
0Bryson DeChambeau327.626218
1Dean Burmester324.125938
2Rory McIlroy320.825668
3Joaquin Niemann320.125618
4Garrick Higgo318.625498
...............
76Joel Dahmen287.523008
77Henrik Stenson286.422918
78Steve Stricker285.122818
79Tom Hoge283.422678
80Russell Henley282.622618
\n

81 rows × 4 columns

\n
" }, "metadata": {} } ], "source": [ "# don't care about a number of those columns\n", "d_cols = ['PLAYER NAME', 'AVG.', 'TOTAL DISTANCE', 'TOTAL DRIVES']\n", "t_stats = stat[1][d_cols]\n", "display(t_stats)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": " PLAYER NAME AVG. TOTAL DISTANCE TOTAL DRIVES\n0 Bryson DeChambeau 327.6 2621 8\n1 Dean Burmester 324.1 2593 8\n2 Rory McIlroy 320.8 2566 8\n3 Joaquin Niemann 320.1 2561 8\n4 Garrick Higgo 318.6 2549 8\n.. ... ... ... ...\n76 Joel Dahmen 287.5 2300 8\n77 Henrik Stenson 286.4 2291 8\n78 Steve Stricker 285.1 2281 8\n79 Tom Hoge 283.4 2267 8\n80 Russell Henley 282.6 2261 8\n\n[81 rows x 4 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PLAYER NAMEAVG.TOTAL DISTANCETOTAL DRIVES
0Bryson DeChambeau327.626218
1Dean Burmester324.125938
2Rory McIlroy320.825668
3Joaquin Niemann320.125618
4Garrick Higgo318.625498
...............
76Joel Dahmen287.523008
77Henrik Stenson286.422918
78Steve Stricker285.122818
79Tom Hoge283.422678
80Russell Henley282.622618
\n

81 rows × 4 columns

\n
" }, "metadata": {} } ], "source": [ "# now let's save that data to a local csv file, don't want to keep retrieving it\n", "tst_csv = f'./data/pga_2021_drv.test.csv'\n", "# using the columns= parameter didn't seem to work\n", "t_stats.to_csv(tst_csv, columns=d_cols, index=False)\n", "# let's read it back\n", "ts_back = pd.read_csv(tst_csv)\n", "display(ts_back)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }