from pandas import Series, DataFrame # Series接收list或dict作为一维数据 #两个属性:values, index #① s1 = Series([4,7,-5,3]) print(s1.values) #值 print(s1.index) #序列号 s1.index = ['a','b','c','d'] print(s1) #② s2 = Series({'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}) print(s2) #DataFrame接收matrix或dict(要求item为list)作为二维数据 # 三个属性:values, index, columns #① data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]} f1 = DataFrame(data) print(f1.values) #值 print(f1.index) #行序列(号) print(f1.columns)#列序列(号) #② f2 = DataFrame(data, columns=['year', 'state', 'pop']) #③ f3 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five']) #重要的功能 ##1.重新索引 #① Series s1 = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c']) s2 = s1.reindex(['a', 'b', 'c', 'd', 'e']) s3 = s1.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0) s4 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4]) s5 = s4.reindex(range(6), method='ffill') #② DataFrame f1 = DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California']) f2 = f1.reindex(index=['a', 'b', 'c', 'd']) #行序列(号) f3 = f1.reindex(columns=['Texas', 'Utah', 'California'])#列序列(号) ##2.索引,挑选和过滤 # .at, .iat, .loc, .iloc .ix # 1)类似 ndarry 的索引操作 #① Series s1 = Series(np.arange(4.), index=['a', 'b', 'c', 'd']) s1['b'] s1[1] s1[2:4] s1[['b', 'a', 'd']] s1[[1, 3]] s1[s1 < 2] s1['b':'c'] s1['b':'c'] = 5 #② DataFrame df = DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'], columns=['one', 'two', 'three', 'four']) df['two'] df[['three', 'one']] df[:2] df[df['three'] > 5] df > 5 df[df['three'] < 5] = 0 # 2)标签索引 #① Series #② DataFrame df.ix['Colorado', 'three'] df.ix['Colorado', ['three', 'four']] df.ix[['Colorado', 'Utah'], ['three', 'four']] df.ix[['Colorado', 'Utah'], [2, 0, 3]] df.ix['Colorado'] df.ix[2] df.ix[:'Utah', 'three'] df.ix[df.three > 5, :3]