Skip to content

Commit ebeb3df

Browse files
committed
PCC51 ananda23-cs
1 parent 02b7765 commit ebeb3df

File tree

1 file changed

+143
-0
lines changed

1 file changed

+143
-0
lines changed

51/ananda23-cs/nba.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
from collections import namedtuple
2+
import csv
3+
import os
4+
import sqlite3
5+
6+
import requests
7+
8+
DATA_URL = 'https://query.data.world/s/ezwk64ej624qyverrw6x7od7co7ftm'
9+
DATA_CACHED = 'nba.data'
10+
NBA_DB = 'nba.db'
11+
12+
# start clean
13+
if os.path.isfile(NBA_DB):
14+
os.remove(NBA_DB)
15+
16+
Player = namedtuple('Player', ('name year first_year team college active '
17+
'games avg_min avg_points'))
18+
19+
conn = sqlite3.connect(NBA_DB)
20+
cur = conn.cursor()
21+
22+
23+
def _get_csv_data():
24+
"""GIVEN:
25+
Load in CSV data in from remote URL or local cache file"""
26+
if os.path.isfile(DATA_CACHED):
27+
with open(DATA_CACHED) as f:
28+
return f.read()
29+
else:
30+
with requests.Session() as session:
31+
return session.get(DATA_URL).content.decode('utf-8')
32+
33+
34+
def load_data():
35+
"""GIVEN:
36+
Converts NBA CSV data into a list of Player namedtuples"""
37+
content = _get_csv_data()
38+
reader = csv.DictReader(content.splitlines(), delimiter=',')
39+
for row in reader:
40+
player = Player(name=row['Player'],
41+
year=row['Draft_Yr'],
42+
first_year=row['first_year'],
43+
team=row['Team'],
44+
college=row['College'],
45+
active=row['Yrs'],
46+
games=row['Games'],
47+
avg_min=row['Minutes.per.Game'],
48+
avg_points=row['Points.per.Game'])
49+
yield player
50+
51+
52+
# CODE HERE (tests under __main__):
53+
54+
def import_to_db(players=None):
55+
"""Create database table in sqlite3 and import the players data
56+
57+
required table SQL:
58+
CREATE TABLE players (name, year, first_year, team, college,
59+
active, games, avg_min, avg_points)
60+
"""
61+
if players is None:
62+
players = list(load_data())
63+
# you code ...
64+
sql = '''CREATE TABLE players (name VARCHAR(255) NOT NULL, year INT NOT NULL, first_year INT NOT NULL, team VARCHAR(255) NOT NULL, college VARCHAR(255) NOT NULL, active INT NOT NULL, games INT NOT NULL, avg_min DOUBLE NOT NULL, avg_points DOUBLE NOT NULL)'''
65+
cur.execute(sql)
66+
for player in players:
67+
insertSql = '''INSERT INTO players VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)'''
68+
cur.execute(insertSql, (player.name, int(player.year), int(player.first_year), player.team, player.college, int(player.active), int(player.games), float(player.avg_min), float(player.avg_points)))
69+
70+
71+
def player_with_max_points_per_game():
72+
"""The player with highest average points per game (don't forget to CAST to
73+
numeric in your SQL query)"""
74+
sql = '''SELECT name FROM players WHERE avg_points = (SELECT MAX(avg_points) FROM players)'''
75+
cur.execute(sql)
76+
return cur.fetchone()[0]
77+
78+
79+
def number_of_players_from_duke():
80+
"""Return the number of players with college == Duke University"""
81+
sql = "SELECT COUNT(*) FROM players WHERE college = 'Duke University'"
82+
cur.execute(sql)
83+
return cur.fetchone()[0]
84+
85+
86+
def percentage_of_players_first_year():
87+
"""Return 2 digit percentage of players whose first year it is
88+
(first_year column)"""
89+
sql = '''SELECT AVG(first_year) * 100.00 FROM players'''
90+
cur.execute(sql)
91+
return cur.fetchone()[0]
92+
93+
94+
def avg_years_active_players_stanford():
95+
"""Return the average years that players from "Stanford University
96+
are active ("active" column)"""
97+
sql = "SELECT AVG(active) FROM players WHERE college='Stanford University'"
98+
cur.execute(sql)
99+
return cur.fetchone()[0]
100+
101+
102+
def year_with_most_drafts():
103+
"""Return the year with the most drafts, in SQL you can use GROUP BY"""
104+
sql = '''SELECT year, COUNT(*) FROM players GROUP BY year ORDER BY COUNT(*) DESC'''
105+
cur.execute(sql)
106+
ret = cur.fetchall()
107+
return ret[0][0]
108+
109+
110+
def most_games_per_year_for_veterans():
111+
"""Top 6 players that are > 10 years active, that have the
112+
highest # games / year"""
113+
sql = '''SELECT name FROM players WHERE active > 10 ORDER BY games/active DESC LIMIT 6'''
114+
cur.execute(sql)
115+
return list(map(lambda f: f[0], cur.fetchall()))
116+
117+
118+
if __name__ == '__main__':
119+
import_to_db()
120+
121+
# A. check if the import went well
122+
def _verify_total_row_count_after_import():
123+
sql = '''SELECT COUNT(*) FROM players'''
124+
cur.execute(sql)
125+
ret = cur.fetchall()
126+
return ret[0][0]
127+
128+
assert _verify_total_row_count_after_import() == 3961
129+
130+
# B. some simple asserts of the data analysis functions
131+
assert player_with_max_points_per_game() == 'Michael Jordan'
132+
133+
assert number_of_players_from_duke() == 58
134+
135+
assert round(avg_years_active_players_stanford(), 2) == 4.58
136+
137+
assert round(percentage_of_players_first_year(), 2) == 1.51
138+
139+
assert int(year_with_most_drafts()) == 1984
140+
141+
expected = ['A.C. Green', 'Alex English', 'Jack Sikma',
142+
'John Stockton', 'Mark Eaton', 'Terry Tyler']
143+
assert sorted(most_games_per_year_for_veterans()) == expected

0 commit comments

Comments
 (0)