import pandas as pd
import numpy as np
type_specified={"trip_id":"object","bikeid":"object","from_station_id":"object","to_station_id":"object",
"hour":"object","weekday":"object"}
ridership=pd.read_csv(r"E:\chicago_bikes_data\txt&csv\ridership_2016.txt",
nrows=10000,dtype=type_specified,usecols=range(1,15))
ridership.head()
|
trip_id |
starttime |
stoptime |
bikeid |
tripduration |
from_station_id |
from_station_name |
to_station_id |
to_station_name |
usertype |
gender |
birthyear |
hour |
weekday |
0 |
9080551 |
3/31/2016 23:53 |
4/1/2016 0:07 |
155 |
841 |
344 |
Ravenswood Ave & Lawrence Ave |
458 |
Broadway & Thorndale Ave |
Subscriber |
Male |
1986.0 |
23 |
4 |
1 |
9080550 |
3/31/2016 23:46 |
3/31/2016 23:57 |
4831 |
649 |
128 |
Damen Ave & Chicago Ave |
213 |
Leavitt St & North Ave |
Subscriber |
Male |
1980.0 |
23 |
4 |
2 |
9080549 |
3/31/2016 23:42 |
3/31/2016 23:46 |
4232 |
210 |
350 |
Ashland Ave & Chicago Ave |
210 |
Ashland Ave & Division St |
Subscriber |
Male |
1979.0 |
23 |
4 |
3 |
9080548 |
3/31/2016 23:37 |
3/31/2016 23:55 |
3464 |
1045 |
303 |
Broadway & Cornelia Ave |
458 |
Broadway & Thorndale Ave |
Subscriber |
Male |
1980.0 |
23 |
4 |
4 |
9080547 |
3/31/2016 23:33 |
3/31/2016 23:37 |
1750 |
202 |
334 |
Lake Shore Dr & Belmont Ave |
329 |
Lake Shore Dr & Diversey Pkwy |
Subscriber |
Male |
1969.0 |
23 |
4 |
ridership_user=ridership.groupby("usertype")
ridership_user_week=ridership.groupby(["usertype","weekday"])
ridership_user.size()
usertype
Customer 1023
Subscriber 8977
dtype: int64
ridership_user_week.size()
usertype weekday
Customer 3 286
4 737
Subscriber 3 2714
4 6263
dtype: int64
ridership_user_week.size().loc["Customer","3"]
286
ridership_user.get_group("Customer").head()
|
trip_id |
starttime |
stoptime |
bikeid |
tripduration |
from_station_id |
from_station_name |
to_station_id |
to_station_name |
usertype |
gender |
birthyear |
hour |
weekday |
23 |
9080526 |
3/31/2016 23:04 |
3/31/2016 23:32 |
1638 |
1648 |
505 |
Winchester Ave & Elston Ave |
505 |
Winchester Ave & Elston Ave |
Customer |
NaN |
NaN |
23 |
4 |
65 |
9080481 |
3/31/2016 22:24 |
3/31/2016 22:45 |
3438 |
1220 |
152 |
Lincoln Ave & Diversey Pkwy |
316 |
Damen Ave & Sunnyside Ave |
Customer |
NaN |
NaN |
22 |
4 |
77 |
9080463 |
3/31/2016 22:15 |
3/31/2016 22:17 |
3464 |
97 |
47 |
State St & Kinzie St |
47 |
State St & Kinzie St |
Customer |
NaN |
NaN |
22 |
4 |
113 |
9080426 |
3/31/2016 22:02 |
3/31/2016 22:29 |
2716 |
1582 |
340 |
Clark St & Wrightwood Ave |
161 |
Rush St & Superior St |
Customer |
NaN |
NaN |
22 |
4 |
115 |
9080424 |
3/31/2016 22:02 |
3/31/2016 22:29 |
1052 |
1611 |
340 |
Clark St & Wrightwood Ave |
161 |
Rush St & Superior St |
Customer |
NaN |
NaN |
22 |
4 |
ridership_user_week.get_group(("Customer","3")).head()
|
trip_id |
starttime |
stoptime |
bikeid |
tripduration |
from_station_id |
from_station_name |
to_station_id |
to_station_name |
usertype |
gender |
birthyear |
hour |
weekday |
7002 |
9071238 |
3/30/2016 23:57 |
3/31/2016 0:03 |
4018 |
317 |
305 |
Western Ave & Division St |
374 |
Western Ave & Walton St |
Customer |
NaN |
NaN |
23 |
3 |
7007 |
9071233 |
3/30/2016 23:51 |
3/30/2016 23:56 |
4018 |
316 |
374 |
Western Ave & Walton St |
305 |
Western Ave & Division St |
Customer |
NaN |
NaN |
23 |
3 |
7014 |
9071226 |
3/30/2016 23:28 |
3/30/2016 23:45 |
1142 |
993 |
71 |
Morgan St & Lake St |
15 |
Racine Ave & 18th St |
Customer |
NaN |
NaN |
23 |
3 |
7117 |
9071031 |
3/30/2016 20:13 |
3/30/2016 20:34 |
1919 |
1302 |
284 |
Michigan Ave & Jackson Blvd |
173 |
Mies van der Rohe Way & Chicago Ave |
Customer |
NaN |
NaN |
20 |
3 |
7118 |
9071030 |
3/30/2016 20:13 |
3/30/2016 20:34 |
2909 |
1312 |
284 |
Michigan Ave & Jackson Blvd |
173 |
Mies van der Rohe Way & Chicago Ave |
Customer |
NaN |
NaN |
20 |
3 |
max_time_from_id=ridership_user_week.apply(lambda record:record["from_station_id"][record["tripduration"].idxmax()])
max_time_to_id=ridership_user_week.apply(lambda record:record["to_station_id"][record["tripduration"].idxmax()])
max_time_time=ridership_user_week.apply(lambda record:record["tripduration"][record["tripduration"].idxmax()])
max_time_ridership=pd.DataFrame({"from_station_id":max_time_from_id,"to_station_id":max_time_to_id,
"tripduratio":max_time_time})
max_time_ridership
|
|
from_station_id |
to_station_id |
tripduratio |
usertype |
weekday |
|
|
|
Customer |
3 |
35 |
277 |
81702 |
4 |
340 |
229 |
68571 |
Subscriber |
3 |
287 |
254 |
71758 |
4 |
133 |
49 |
70977 |