生成激活码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
#!/usr/bin/env python
#encoding:utf-8
#Author:sean
import
string
import
random
#激活码中的字符和数字
field
=
string.letters
+
string.digits
#获得四个字母和数字的随机组合
def
getRandom():
return
''.join(random.sample(field,
4
))
#生成的每个激活码中有几组
def
concatenate(group):
return
'-'
.join([getRandom()
for
i
in
range
(group)])
#生成n组激活码
def
generate(n):
return
[concatenate(
4
)
for
i
in
range
(n)]
if
__name__
=
=
'__main__'
:
print
generate(
10
)
|
统计单词
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
#!/usr/bin/env python
#encoding:utf-8
import
re
from
collections
import
Counter
FileSource
=
'./media/abc.txt'
def
getMostCommonWord(articlefilesource):
'''输入一个英文的纯文本文件,统计其中的单词出现的个数'''
pattern
=
r
'[A-Za-z]+|\$?\d+%?$'
with
open
(articlefilesource) as f:
r
=
re.findall(pattern,f.read())
return
Counter(r).most_common()
if
__name__
=
=
'__main__'
:
print
getMostCommonWord(FileSource)
|
提取网页正文
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
#!/usr/bin/env python
#encoding:utf-8
from
goose
import
Goose
from
goose.text
import
StopWordsChinese
import
sys
#要分析的网页url
url
=
'
def
extract(url):
'''
提取网页正文
'''
g
=
Goose({
'stopwords_class'
:StopWordsChinese})
artlcle
=
g.extract(url
=
url)
return
artlcle.cleaned_text
if
__name__
=
=
'__main__'
:
print
extract(url)
|
本文转自 忘情OK 51CTO博客,原文链接:http://blog.51cto.com/itchentao/1899821,如需转载请自行联系原作者