Pandas数据排序
.sort_index()
在指定轴上根据索引进行排序,索引排序后内容会跟随排序
import pandas as pd
import numpy as np
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])
b
|
0 |
1 |
2 |
3 |
4 |
c |
0 |
1 |
2 |
3 |
4 |
a |
5 |
6 |
7 |
8 |
9 |
d |
10 |
11 |
12 |
13 |
14 |
b |
15 |
16 |
17 |
18 |
19 |
b.sort_index()
|
0 |
1 |
2 |
3 |
4 |
a |
5 |
6 |
7 |
8 |
9 |
b |
15 |
16 |
17 |
18 |
19 |
c |
0 |
1 |
2 |
3 |
4 |
d |
10 |
11 |
12 |
13 |
14 |
b.sort_index(ascending=False)
|
0 |
1 |
2 |
3 |
4 |
d |
10 |
11 |
12 |
13 |
14 |
c |
0 |
1 |
2 |
3 |
4 |
b |
15 |
16 |
17 |
18 |
19 |
a |
5 |
6 |
7 |
8 |
9 |
b.sort_index(axis=0, ascending=False)
|
0 |
1 |
2 |
3 |
4 |
d |
10 |
11 |
12 |
13 |
14 |
c |
0 |
1 |
2 |
3 |
4 |
b |
15 |
16 |
17 |
18 |
19 |
a |
5 |
6 |
7 |
8 |
9 |
b.sort_index(axis=1, ascending=False)
|
4 |
3 |
2 |
1 |
0 |
c |
4 |
3 |
2 |
1 |
0 |
a |
9 |
8 |
7 |
6 |
5 |
d |
14 |
13 |
12 |
11 |
10 |
b |
19 |
18 |
17 |
16 |
15 |
.sort_values()
在指定轴上根据数值进行排序,默认升序
- Series.sort_values(axis=0,ascending=True)
- DataFrame.sort_values(by,axis=0,ascending=True)
dates = pd.date_range('20130101', periods=10)
dates
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
'2013-01-05', '2013-01-06', '2013-01-07', '2013-01-08',
'2013-01-09', '2013-01-10'],
dtype='datetime64[ns]', freq='D')
df = pd.DataFrame(np.random.randn(10,4),index=dates,columns=['A','B','C','D'])
df.head()
|
A |
B |
C |
D |
2013-01-01 |
-0.300266 |
0.683232 |
0.777509 |
-0.274338 |
2013-01-02 |
2.298084 |
-0.855524 |
1.462064 |
-0.725142 |
2013-01-03 |
0.512711 |
0.824380 |
0.384902 |
-1.437241 |
2013-01-04 |
0.388478 |
-1.265414 |
-1.104333 |
-0.447689 |
2013-01-05 |
0.273518 |
-0.314857 |
-2.545510 |
-1.301629 |
c = df.sort_values('B')
c.head()
|
A |
B |
C |
D |
2013-01-01 |
-0.976353 |
-2.176075 |
0.255585 |
0.645465 |
2013-01-03 |
-1.549727 |
-1.876790 |
0.966724 |
0.486101 |
2013-01-06 |
-0.000467 |
-1.430820 |
-1.803610 |
-0.587985 |
2013-01-10 |
-0.293663 |
-0.691951 |
0.262666 |
-1.298977 |
2013-01-04 |
-0.032301 |
-0.618582 |
1.204373 |
-0.302137 |
c = df.sort_values('B',ascending = False)
c.head()
|
A |
B |
C |
D |
2013-01-03 |
0.512711 |
0.824380 |
0.384902 |
-1.437241 |
2013-01-01 |
-0.300266 |
0.683232 |
0.777509 |
-0.274338 |
2013-01-08 |
0.010939 |
0.591777 |
0.143182 |
0.461798 |
2013-01-10 |
0.811169 |
0.100516 |
-1.385373 |
0.168329 |
2013-01-05 |
0.273518 |
-0.314857 |
-2.545510 |
-1.301629 |
c = df.sort_values('2013-01-01',axis=1,ascending=False)
c.head()
|
C |
B |
D |
A |
2013-01-01 |
0.777509 |
0.683232 |
-0.274338 |
-0.300266 |
2013-01-02 |
1.462064 |
-0.855524 |
-0.725142 |
2.298084 |
2013-01-03 |
0.384902 |
0.824380 |
-1.437241 |
0.512711 |
2013-01-04 |
-1.104333 |
-1.265414 |
-0.447689 |
0.388478 |
2013-01-05 |
-2.545510 |
-0.314857 |
-1.301629 |
0.273518 |
NaN空值统一放在排序末尾
a = pd.DataFrame(np.arange(12).reshape(3,4),index=['a','b','c'])
a
|
0 |
1 |
2 |
3 |
a |
0 |
1 |
2 |
3 |
b |
4 |
5 |
6 |
7 |
c |
8 |
9 |
10 |
11 |
b = pd.DataFrame(np.arange(20).reshape(4,5),index=['c','a','d','b'])
b
|
0 |
1 |
2 |
3 |
4 |
c |
0 |
1 |
2 |
3 |
4 |
a |
5 |
6 |
7 |
8 |
9 |
d |
10 |
11 |
12 |
13 |
14 |
b |
15 |
16 |
17 |
18 |
19 |
c = a + b
c
|
0 |
1 |
2 |
3 |
4 |
a |
5.0 |
7.0 |
9.0 |
11.0 |
NaN |
b |
19.0 |
21.0 |
23.0 |
25.0 |
NaN |
c |
8.0 |
10.0 |
12.0 |
14.0 |
NaN |
d |
NaN |
NaN |
NaN |
NaN |
NaN |
c.sort_values(2,ascending = False)
|
0 |
1 |
2 |
3 |
4 |
b |
19.0 |
21.0 |
23.0 |
25.0 |
NaN |
c |
8.0 |
10.0 |
12.0 |
14.0 |
NaN |
a |
5.0 |
7.0 |
9.0 |
11.0 |
NaN |
d |
NaN |
NaN |
NaN |
NaN |
NaN |
c.sort_values(2,ascending = True)
|
0 |
1 |
2 |
3 |
4 |
a |
5.0 |
7.0 |
9.0 |
11.0 |
NaN |
c |
8.0 |
10.0 |
12.0 |
14.0 |
NaN |
b |
19.0 |
21.0 |
23.0 |
25.0 |
NaN |
d |
NaN |
NaN |
NaN |
NaN |
NaN |