学习python,在老雷的指点下去完成一个小需求
功能:检索/var/log/secure中的ip,超过阀值就写入/etc/hosts.deny文件中去
1、首先从/var/log/secure中提取ip地址
2、然后进行排序,挑选出访问量大于500的ip
3、把收集到的ip写入到hosts.deny文件中,写入之前先判断是否已存在
格式要求如下:
1
2
3
4
5
6
7
|
########2014-06-25#######
60.xxx.xxx.xxx
58.xxx.xxx.xxx
########2014-06-26#######
60.xxx.xxx.xxx
58.xxx.xxx.xxx
|
解决过程
1、首先从/var/log/secure中提取ip地址
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
# coding: utf-8
# Auther: zhuima
# Date: 2014-06-24
# Function: fetch ip address
# Usage: python sys.argv[0] logpath
#
import
re
import
sys
pattern = re.compile(r
'(\d+\.\d+\.\d+\.\d+)'
)
def ip_fetch(logfile):
with
open
(logfile) as f:
for
line
in
f:
m = pattern.search(line)
if
m:
print m.group()
if
len(sys.argv) == 2:
ip_fetch(sys.argv[1])
else
:
print
"Usage: python %s /path/logfile"
% sys.argv[0]
print
" "
|
效果如下:
1
2
3
4
5
6
7
8
9
|
[root@nginx1 python]
# python tiquip.py secure
114.114.114.114
[root@nginx1 python]
# python tiquip.py
Usage: python tiquip.py
/path/logfile
[root@nginx1 python]
#
|
2、进行排序,挑选出排行前十的ip
需要解决问题:去重排序
sorted()排序
set()去重
裸用dict来做得话:
counter = dict()
for ip in m:
counter[ip] = counter.get(ip, 0) + 1
http://www.newsmth.net/nForum/#!article/Python/113879
http://hi.baidu.com/pythond/item/3607e6564aacae928d12ed41
去重(成哥提供的网站)
from collections import Counter
Counter([1,1,1,2,3,4,1,2,3,4])
Counter({1: 4, 2: 2, 3: 2, 4: 2})
2.1、去重操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
[root@nginx1 python]
# cat fetch.py
# coding: utf-8
# Auther: zhuima
# Date: 2014-06-24
# Function: fetch ip address
# Usage: python sys.argv[0] logpath
#
from collections
import
Counter
import
re
import
sys
pattern = re.compile(r
'(\d+\.\d+\.\d+\.\d+)'
)
listip = []
def ip_fetch(logfile):
with
open
(logfile) as f:
for
line
in
f:
m = pattern.search(line)
if
m:
listip.append(m.group())
#print m.group()
def
sort
():
td = Counter(listip)
for
ip,count
in
td.items():
print count,
"\t"
,ip
if
len(sys.argv) == 2:
ip_fetch(sys.argv[1])
sort
()
else
:
print
"Usage: python %s /path/logfile"
% sys.argv[0]
print
" "
|
效果如下:
1
2
3
4
5
6
7
8
|
[root@nginx1 python]
# python fetch.py secure
1 114.xxx.xxx.xxx
1 114.xxx.xxx.xxx
1 106.xxx.xxx.xxx
4 124.xxx.xxx.xxx
1 61.xxx.xxx.xxx
[root@nginx1 python]
#
|
2.2、接下来进行排序
http://www.cnblogs.com/liyixin/archive/2012/07/23/2605013.html
脚本样式:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
|
# coding: utf-8
# Auther: zhuima
# Date: 2014-06-24
# Function: fetch ip address
# Usage: python sys.argv[0] logpath
#
from collections
import
Counter
import
re
import
sys
pattern = re.compile(r
'(\d+\.\d+\.\d+\.\d+)'
)
listip = []
def ip_fetch(logfile):
with
open
(logfile) as f:
for
line
in
f:
m = pattern.search(line)
if
m:
listip.append(m.group())
def
sort
():
td = Counter(listip)
newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True)
for
line
in
newtd:
print line
if
len(sys.argv) == 2:
ip_fetch(sys.argv[1])
sort
()
else
:
print
"Usage: python %s /path/logfile"
% sys.argv[0]
print
" "
|
效果如下:
1
2
3
4
5
6
7
|
[root@nginx1 python]
# python fetch.py secure
(
'124.64.63.119'
, 4)
(
'114.245.169.74'
, 1)
(
'114.252.165.177'
, 1)
(
'106.37.169.186'
, 1)
(
'61.49.238.82'
, 1)
[root@nginx1 python]
#
|
再次更改,去除数值外面的圆括号
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
[root@nginx1 python]
# vim fetch.py
# coding: utf-8
# Auther: zhuima
# Date: 2014-06-24
# Function: fetch ip address
# Usage: python sys.argv[0] logpath
#
from collections
import
Counter
import
re
import
sys
pattern = re.compile(r
'(\d+\.\d+\.\d+\.\d+)'
)
listip = []
def ip_fetch(logfile):
with
open
(logfile) as f:
for
line
in
f:
m = pattern.search(line)
if
m:
listip.append(m.group())
def
sort
():
td = Counter(listip)
newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True)
for
line
in
newtd:
print line[1],
"\t"
,line[0]
if
len(sys.argv) == 2:
ip_fetch(sys.argv[1])
sort
()
else
:
print
"Usage: python %s /path/logfile"
% sys.argv[0]
print
" "
|
效果如下:
1
2
3
4
5
6
7
8
|
[root@nginx1 python]
# python fetch.py secure
4 124.xxx.xxx.xxx
1 114.xxx.xxx.xxx
1 114.xxx.xxx.xxx
1 106.xxx.xxx.xxx
1 61.xxx.xxx.xxx
[root@nginx1 python]
#
|
3、解析来解决如何写入到hosts.deny文件中去
思路:
先判断要写入的ip是否在hosts.deny文件中,如果在,就跳过,如果不在就写入
脚本测试文件,写不进文件中去
1
2
3
4
5
6
7
8
9
10
|
[root@nginx1 python]
# vim write.py
# coding utf-8
#
x =
'192.168.23.22'
with
open
(
"/etc/hosts.deny"
,
"a+"
) as f:
for
line
in
f:
if
x not
in
line:
f.write(x)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
# coding utf-8
#
import
datetime
dd
= datetime.
x =
'192.168.23.22'
f =
open
(
"/etc/hosts.deny"
,
"a+"
)
for
line
in
f:
if
x not
in
f:
f.write(
"#########"
)
f.write(datetime
f.write(
"#########"
)
f.write(x)
f.write(
"\n"
)
f.close()
|
http://blog.sina.com.cn/s/blog_6c3748830100ypt9.html
单个ip验证的时候
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
[root@
rsync
python]
# cat write.py
# coding utf-8
#
import
datetime
day = datetime.
date
.today()
today = day.strftime(
"%Y-%m-%d"
)
x = [
'192.168.23.22'
,
'192.168.23.21'
]
i = 0
f =
open
(
"/etc/hosts.deny"
,
"a+"
)
for
line
in
f:
if
x[i] not
in
line:
print x[i]
f.write(
"#########"
)
f.write(today)
f.write(
"#########"
)
f.write(
"\n"
)
f.write(x[i])
f.write(
"\n"
)
i += 1
else
:
print x[i],
"is exists!"
break
f.close()
|
当然我们实际环境中不可能只用一个ip,肯定会有很多的额,所以这里就需要使用列表了
多个参数传入的时候该如何是好
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
[root@
rsync
python]
# cat write.py
# coding utf-8
#
import
datetime
day = datetime.
date
.today()
today = day.strftime(
"%Y-%m-%d"
)
x = [
'192.168.23.22'
,
'192.168.23.21'
]
i = 0
f =
open
(
"/etc/hosts.deny"
,
"a+"
)
for
line
in
f:
if
x[i] not
in
line and i <= len(x):
f.write(
"#########"
)
f.write(today)
f.write(
"#########"
)
f.write(
"\n"
)
f.write(x[i])
f.write(
"\n"
)
i += 1
else
:
print x[i],
"is exists!"
break
|
上面代码报错:
1
2
3
4
5
6
7
|
[root@
rsync
python]
#
[root@
rsync
python]
# python write.py /etc/hosts.deny
Traceback (most recent call last):
File
"write.py"
, line 20,
in
<module>
if
x[i] not
in
line and i <= len(x):
IndexError: list index out of range
[root@
rsync
python]
#
|
踩坑之旅:
一直陷在循环里面挑不出来了,所以就一直绕啊绕~
问题:
1、嵌套循环无法解决上层循环的次数比内层循环的次数多的问题,从而导致插入数据过多
2、判断数据在列表中是否存在的时候老是想着要对比循环
3、基础薄弱
老雷给的建议:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
# coding utf-8
#
import
datetime
day = datetime.
date
.today()
today = day.strftime(
"%Y-%m-%d"
)
ip_list = [
'192.168.23.22'
,
'192.168.23.21'
]
data =
open
(
"/etc/hosts.deny"
,
"a+"
)
content_list = data.readlines()
for
i
in
ip_list:
if
i not
in
content_list:
data.write(i+
'\n'
)
f.close()
|
最总完结的脚本:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
[root@blog python]
# cat fetch.py
# coding: utf-8
# Auther: zhuima
# Date: 2014-06-24
# Function: fetch ip address
# Usage: python sys.argv[0] logpath
#
from collections
import
Counter
import
re
import
sys
import
datetime
pattern = re.compile(r
'(\d+\.\d+\.\d+\.\d+)'
)
source_listip = []
today = datetime.
date
.today().strftime(
"%Y-%m-%d"
)
def ip_fetch(logfile):
with
open
(logfile) as f:
for
line
in
f:
m = pattern.search(line)
if
m:
source_listip.append(m.group())
def sort_write():
data_file =
open
(
"/etc/hosts.deny"
,
"a+"
)
content_list = data_file.readlines()
note = [
"########"
,today,
"#######\n"
]
td = Counter(source_listip)
newtd = sorted(td.iteritems(),key=lambda td:td[1],reverse=True)
for
line
in
content_list:
if
today
in
line.strip(
"#"
):
break
else
:
data_file.writelines(note)
break
for
line
in
newtd:
print line[1],
"\t"
,line[0]
if
line[1] >= 500 and line[0] not
in
content_list:
data_file.write(line[0]+
"\n"
)
data_file.close()
if
len(sys.argv) == 2:
ip_fetch(sys.argv[1])
sort_write()
else
:
print
"Usage: python %s /path/logfile"
% sys.argv[0]
|
效果如下图所示:
Note:
由于导入datetime是使用的当前时间,而不是系统时间,所以这里显示的两个时间是一致的
总结:
感谢斌哥的思路指导,感谢成哥的Counter的思路提供,感谢老雷的一路指导~
1、基础知识掌握太弱
2、写的过程中老是急于求成,没有精心来去思考
3、多看官方文档,建议把标准库查看一遍
4、该脚本很烂,后期继续以函数的形式来写