成员关系操作符:in,not in
连接操作符:+
从节约内存的角度去考虑,对于字符串,建议使用join,对于列表,建议使用extend().
重复操作符:*
切片操作符:[ ],[:],[::]
操作 | 结果 |
L或L[:] | [1, 2, 3, 4, 5] |
L[0:3]或L[:3] | [0, 1, 2] |
L[2:5]或L[2:] | [2, 3, 4] |
L[1:3] | [1, 2] |
L[3] | [3] |
切片操作扩展:使用步长索引
1
2
3
4
5
|
>>> s
=
'abcdefgh'
>>> s[::
-
1
]
#可以视作翻转
'hgfedcba'
>>> s[::
2
]
#隔一个取一个
'aceg'
|
切片索引的更多内容
1
2
3
4
5
6
7
8
9
|
>>> s
=
'abcde'
>>>
for
i
in
[
None
]
+
range
(
-
1
,
-
len
(s),
-
1
):
...
print
s[:i]
...
abcde
abcd
abc
ab
a
|
1
2
3
4
5
6
|
>>>
for
i
in
[
None
].extend(
range
(
-
1
,
-
len
(s),
-
1
)):
...
print
s[:i]
...
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
<module>
TypeError:
'NoneType'
object
is
not
iterable
|
1
2
3
4
5
|
>>> s
=
'xpleaf'
>>> s[
-
100
:
100
]
'xpleaf'
>>> s[
-
100
:
-
50
]
''
|
list(iter)
str(obj)
unicode(obj)
basestring()
tuple(iter)
enumerate(iter)
len(seq)
max(iter, key=None) or max(arg0, arg1, key=None)
min(iter, key=None) or min(arg0, arg1, key=None)
reversed(seq)
sorted(iter, func=None, key=None, reverse=False)
sum(seq, init=0)
zip([it0, it1,... itN])
str:通常意义的字符串
unicode:Unicode字符串
basestring:抽象类,str和unicode都是其子类,不可被实例化
1
2
3
4
|
>>>
basestring
(
'foo'
)
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
<module>
TypeError: The
basestring
type
cannot be instantiated
|
字符串创建和赋值:直接赋值或使用str()转换
访问字符串或其字符:通过切片操作
改变字符串:生成新的字符串
删除字符或字符串:可以使用del或赋值空字符串
1
2
3
4
5
6
7
|
>>> str1
=
'abc'
>>> str2
=
'lmn'
>>> str3
=
'xyz'
>>> str1 < str2
True
>>> str2 !
=
str3
True
|
正向索引:使用正索引值
反向索引:使用负索引值
默认索引:使用正索引值
通过前面对序列的了解,这些都比较简单,这里就不作总结了。
1
2
3
4
5
6
|
>>>
'bc'
in
'abcd'
True
>>>
'n'
in
'abcd'
False
>>>
'nm'
not
in
'abcd'
True
|
1
2
3
4
5
6
7
8
9
|
>>>
import
string
>>> string.uppercase
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
>>> string.lowercase
'abcdefghijklmnopqrstuvwxyz'
>>> string.letters
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
>>> string.digits
'0123456789'
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
import
string
alphas
=
string.letters
+
'_'
nums
=
string.digits
print
'Welcome to the Identifier Checker v1.0'
print
'Testees must be at least 2 chars long.'
myInput
=
raw_input
(
'Identifier to test? '
)
if
len
(myInput) >
1
:
if
myInput[
0
]
not
in
alphas:
print
'invalid: first symbol must be alphabetic'
else
:
for
otherChar
in
myInput[
1
:]:
if
otherChar
not
in
alphas
+
nums:
print
'invalid: remaining symbols mustt be alphanumeric'
break
else
:
print
'okay as an identifier'
|
1
2
3
4
5
6
7
8
9
|
Welcome to the Identifier Checker v1.0
Testees must be at least 2 chars long.
Identifier to
test
? .123
invalid: first symbol must be alphabetic
Welcome to the Identifier Checker v1.0
Testees must be at least 2 chars long.
Identifier to
test
? xpleaf
okay as an identifier
|
for-else语句:else语句块在for循环完整执行时才会执行
性能优化:把重复操作作为参数放到循环里面进行是非常低效的
1
|
alphnums
=
alphas
+
nums
|
运行时刻字符串连接
举例如下:
1
2
3
4
5
|
>>>
'xpleaf'
+
' '
+
'clyyh'
'xpleaf clyyh'
>>>
import
string
>>> string.upper(
'xpleaf'
+
' '
+
'clyyh'
)
'XPLEAF CLYYH'
|
1
2
|
>>> '
'.join(('
xpleaf
', '
', '
clyyh')).upper()
'XPLEAF CLYYH'
|
编译时字符串连接
在Python的语法中,允许我们在源码中把几个字符串连在一起来构成新的字符串:
1
2
3
|
>>> foo
=
'Hello'
' '
'World!'
>>> foo
'Hello World!'
|
1
2
3
4
|
>>> string.upper(
'xpleaf/'
...
'yonghaoye'
...
'/clyyh'
)
'XPLEAF/YONGHAOYE/CLYYH'
|
1
2
|
>>>
'Hello'
+
u
' '
+
'World'
+
u
'!'
u
'Hello World!'
|
1
2
|
>>>
'='
*
20
'===================='
|
字符串格式化符号 |
||
格式化字符串 | 转换方式 | 简单例子 |
%c | 转换成字符(ASCII码值,或者长度为一的字符串) | |
%r | 优先使用repr()函数进行字符串转换 | |
%s | 优先使用str()函数进行字符串转换 | |
%d/%i | 转成有符号十进制数 | |
%u | 转成无符号十进制数 | |
%o |
转成无符号八进制数 | |
%x/%X | 转成无符号十六进制数 | |
%e/%E | 转成科学计数法 | >>> print '%e' % 16 |
%f/%F | 转成浮点型(小数部分自然处理) | |
%g/%G | %e和%f/%E和%F的简写,指数小于-4或更高精度时使用%e或%E,否则使用%f | |
%% | 输出% |
元组形式:上面的例子即是采用这种方式
字典形式:后面在总结字典时会提及(下面也有提及)
格式化操作符辅助命令 |
||
符号 | 作用 | 简单例子 |
* | 定义宽度或者小数点精度 | |
- | 用作左对齐 | |
+ | 在正数前面显示加号(+) | |
<sp> | 在正数前面显示空格 | |
# | 在八进制数前显示零('0'),在十六进制数前显示'0x'或'0X' | |
(var) | 映射变量(字典参数) | |
m.n | m是显示的最小总宽度,n是小数点后的位数(如果可用的话) |
1
2
|
>>>
print
'Host: %s\tPort: %d'
%
(
'xpleaf'
,
80
)
Host: xpleaf Port:
80
|
1
2
3
4
5
6
7
8
9
|
class
Menu(db.Model):
__tablename__
=
'menus'
id
=
db.Column(db.Integer, primary_key
=
True
)
name
=
db.Column(db.String(
64
), unique
=
True
)
types
=
db.relationship(
'ArticleType'
, backref
=
'menu'
, lazy
=
'dynamic'
)
order
=
db.Column(db.Integer, default
=
0
, nullable
=
False
)
def
__repr__(
self
):
return
'<Menu %r>'
%
self
.name
|
1
2
3
4
5
6
7
8
9
10
11
|
>>>
class
Test():
...
def
__unicode__(
self
):
...
return
'This is an unicode string'
...
>>> c
=
Test()
>>>
unicode
(c)
u
'This is an unicode string'
>>>
str
(c)
'<__main__.Test instance at 0x7fd83b762bd8>'
>>>
repr
(c)
'<__main__.Test instance at 0x7fd83b762bd8>'
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
# 没有定义__repr__()方法
>>>
class
Test(
object
):
...
pass
...
>>> t
=
Test()
>>> t
<__main__.Test
object
at
0x7f57ec793050
>
# 定义了__repr__()方法
>>>
class
Test(
object
):
...
def
__repr__(
self
):
...
return
"This is a test!"
...
>>> t
=
Test()
>>> t
This
is
a test!
|
substitue()
safe_substitue()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
>>>
from
string
import
Template
>>> s
=
Template(
'My name is ${name}, and my girlfriend is ${girlfriend}.'
)
>>>
print
s.substitute(name
=
'xpleaf'
, girlfriend
=
'cl'
)
My name
is
xpleaf,
and
my girlfriend
is
cl.
>>>
print
s.substitute(name
=
'xpleaf'
)
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
<module>
File
"/usr/lib/python2.7/string.py"
, line
176
,
in
substitute
return
self
.pattern.sub(convert,
self
.template)
File
"/usr/lib/python2.7/string.py"
, line
166
,
in
convert
val
=
mapping[named]
KeyError:
'girlfriend'
>>>
print
s.safe_substitute(name
=
'xpleaf'
)
My name
is
xpleaf,
and
my girlfriend
is
${girlfriend}.
|
1
2
3
4
5
6
7
8
|
>>>
'\n'
'\n'
>>>
print
'\n'
,
>>> r
'\n'
'\\n'
>>>
print
r
'\n'
\n
|
1
2
3
4
5
|
>>> f
=
open
(
'C:\windows\temp\readme.txt'
,
'r'
)
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
?
f
=
open
(
'C:\windows\temp\readme.txt'
,
'r'
)
IOError: [Errno
2
] No such
file
or
directory:
'C:\\windows\\temp\readme.txt'
|
1
|
>>> f
=
open
(r
'C:\windows\temp\readme.txt'
,
'r'
)
|
1
2
|
>>> u
'xpleaf'
u
'xpleaf'
|
1
2
3
4
5
6
7
|
>>> ur
'Hello\nWorld!'
u
'Hello\\nWorld!'
>>> ru
'Hello\nWorld!'
File
"<stdin>"
, line
1
ru
'Hello\nWorld!'
^
SyntaxError: invalid syntax
|
1
2
3
4
5
6
7
8
9
|
>>> str1
=
'abc'
>>> str2
=
'lmn'
>>> str3
=
'xyz'
>>>
cmp
(str1, str2)
-
1
>>>
cmp
(str3, str1)
1
>>>
cmp
(str2,
'lmn'
)
0
|
len()
1
2
|
>>>
len
(
'xpleaf'
)
6
|
max()和min():返回ASCII值最大/最小的字符
1
2
3
4
|
>>>
max
(
'xpleaf'
)
'x'
>>>
min
(
'xpleaf'
)
'a'
|
enumerate(iter):接受一个可迭代对象作为参数,返回一个enumerate对象(也是可迭代对象),该对象生成由iter每个元素的index值和item值组成的元组
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
>>> s
=
'xpleaf'
>>>
enumerate
(s)
<
enumerate
object
at
0x7fd83b7e1eb0
>
>>>
list
(
enumerate
(s))
[(
0
,
'x'
), (
1
,
'p'
), (
2
,
'l'
), (
3
,
'e'
), (
4
,
'a'
), (
5
,
'f'
)]
>>>
for
index, value
in
enumerate
(s):
...
print
index, value
...
0
x
1
p
2
l
3
e
4
a
5
f
|
zip(seq1[, seq2[...]):返回一个包含元组的列表,即zip(seq1 [, seq2 [...]]) -> [(seq1[0], seq2[0] ...), (...)]
1
2
3
4
5
6
|
>>>
zip
(
'123'
)
[(
'1'
,), (
'2'
,), (
'3'
,)]
>>>
zip
(
'123'
,
'abc'
)
[(
'1'
,
'a'
), (
'2'
,
'b'
), (
'3'
,
'c'
)]
>>>
zip
(
'123'
,
'abc'
,
'def'
)
[(
'1'
,
'a'
,
'd'
), (
'2'
,
'b'
,
'e'
), (
'3'
,
'c'
,
'f'
)]
|
raw_input():读取用户输入的字符串
1
2
3
4
|
>>> user_input
=
raw_input
(
'Enter your name:'
)
Enter your name:xpleaf
>>>
len
(user_input)
6
|
str()和unicode()
1
2
3
4
|
>>>
type
(
'xpleaf'
)
<
type
'str'
>
>>>
type
(u
'xpleaf'
)
<
type
'unicode'
>
|
1
2
3
4
|
>>>
unicode
(
'xpleaf'
)
u
'xpleaf'
>>>
str
(u
'xpleaf'
)
'xpleaf'
|
1
2
3
4
5
6
7
8
|
>>>
isinstance
(
'xpleaf'
,
str
)
True
>>>
isinstance
(
'xpleaf'
,
unicode
)
False
>>>
isinstance
(
'xpleaf'
,
basestring
)
True
>>>
isinstance
(u
'xpleaf'
,
basestring
)
True
|
chr() unichr()和ord()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
In [
15
]: quest
Out[
15
]:
'what is you favorite color?'
In [
16
]: quest.
quest.capitalize quest.isalnum quest.lstrip quest.splitlines
quest.center quest.isalpha quest.partition quest.startswith
quest.count quest.isdigit quest.replace quest.strip
quest.decode quest.islower quest.rfind quest.swapcase
quest.encode quest.isspace quest.rindex quest.title
quest.endswith quest.istitle quest.rjust quest.translate
quest.expandtabs quest.isupper quest.rpartition quest.upper
quest.find quest.join quest.rsplit quest.zfill
quest.
format
quest.ljust quest.rstrip
quest.index quest.lower quest.split
In [
16
]:
help
(quest.split)
|
1
2
3
4
|
>>> s
=
'xpleaf'
>>>
dir
(s)
[
'__add__'
,
'__class__'
,
'__contains__'
,
'__delattr__'
,
'__doc__'
,
'__eq__'
,
'__format__'
,
'__ge__'
,
'__getattribute__'
,
'__getitem__'
,
'__getnewargs__'
,
'__getslice__'
,
'__gt__'
,
'__hash__'
,
'__init__'
,
'__le__'
,
'__len__'
,
'__lt__'
,
'__mod__'
,
'__mul__'
,
'__ne__'
,
'__new__'
,
'__reduce__'
,
'__reduce_ex__'
,
'__repr__'
,
'__rmod__'
,
'__rmul__'
,
'__setattr__'
,
'__sizeof__'
,
'__str__'
,
'__subclasshook__'
,
'_formatter_field_name_split'
,
'_formatter_parser'
,
'capitalize'
,
'center'
,
'count'
,
'decode'
,
'encode'
,
'endswith'
,
'expandtabs'
,
'find'
,
'format'
,
'index'
,
'isalnum'
,
'isalpha'
,
'isdigit'
,
'islower'
,
'isspace'
,
'istitle'
,
'isupper'
,
'join'
,
'ljust'
,
'lower'
,
'lstrip'
,
'partition'
,
'replace'
,
'rfind'
,
'rindex'
,
'rjust'
,
'rpartition'
,
'rsplit'
,
'rstrip'
,
'split'
,
'splitlines'
,
'startswith'
,
'strip'
,
'swapcase'
,
'title'
,
'translate'
,
'upper'
,
'zfill'
]
>>>
help
(s.split)
|
反斜杠开头的转义字符 |
|||||
标识 | 八进制 | 十进制 | 十六进制 | 字符 | 说明 |
\0 | 000 | 0 | 0x00 | NULL | 空字符NULL |
\a | 007 | 7 | 0x07 | BEL | 响铃字符 |
\b | 010 | 8 | 0x08 | BS | 退格 |
\t | 011 | 9 | 0x09 | HT | 横向制表符 |
\n | 012 | 10 | 0x0A | LF | 换行 |
\v | 013 | 11 | 0x0B | VT | 纵向制表符 |
\f | 014 | 12 | 0x0C | FF | 换页 |
\r | 015 | 13 | 0x0D | CR | 回车 |
\e | 033 | 27 | 0x1B | ESC | 转义 |
\" | 042 | 34 | 0x22 | " | 双引号 |
\' | 047 | 39 | 0x27 | ' | 单引号 |
\\ | 134 | 92 | 0x5C | \ | 反斜杠 |
八进制:000~0177
十六进制:0x00~0xff
1
2
3
4
5
6
7
8
|
>>>
print
'xpleaf\tclyyh'
#标识符
xpleaf clyyh
>>>
print
'xpleaf%cclyyh'
%
011
#八进制
xpleaf clyyh
>>>
print
'xpleaf%cclyyh'
%
9
#十进制
xpleaf clyyh
>>>
print
'xpleaf%cclyyh'
%
0x09
#十六进制
xpleaf clyyh
|
1
2
3
4
5
6
7
|
>>> hi
=
'''I am xpleaf,
... and I love cl.'''
>>> hi
#rper()形式输出
'I am xpleaf,\nand I love cl.'
>>>
print
hi
#str()形式输出
I am xpleaf,
and
I love cl.
|
1
2
3
4
5
6
7
8
|
>>> s
=
'xpleaf'
>>>
id
(s)
140566687161200
>>> s
=
s
+
'\tclyyh'
>>> s
'xpleaf\tclyyh'
>>>
id
(s)
140566652843432
|
1
2
3
4
5
|
>>> s
=
'xpleaf'
>>> s[
2
]
=
'L'
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
<module>
TypeError:
'str'
object
does
not
support item assignment
|
1
2
3
4
5
|
>>> s[
2
]
'l'
>>> s
=
'%sL%s'
%
(s[
0
:
2
], s[
3
:])
>>> s
'xpLeaf'
|
Unicode术语 |
|
名词 | 含义 |
ASCII | 美国标准信息交换码 |
BMP | 基本多文种平面(第零平面) |
BOM | 字节顺序标记(标识字节顺序的字符) |
CJK/CJKV | 中文-日文-韩文(和越南语)的缩写 |
Code point | 类似于ASCII值,代表Unicode字符的值,范围在range(114112)或者说从0x000000到0x10FFFFFF |
Octet | 八位二进制数的位组 |
UCS | 通用字符集 |
UCS2 | UCS的双字节编码方式(见UTF-16) |
UCS4 | UCS的四字节编码方式 |
UTF | Unicode或者UCS的转换格式 |
UTF-8 | 8位UTF转换格式(无符号字节序列,长度为1~4个字节) |
UTF-16 | 16位UTF转换格式(无符号字节序列,通过是16位长[两个字节],见UCS2) |
编码方式
实现方式:Unicode编码格式
关系
尽量不要使用string模块
Python把硬编码的字符串叫做字面上的字符串,默认所有字面上的字符串都用ASCII编码
str()、chr()、unicode()、unichar()
1
2
3
4
5
6
7
8
9
10
11
|
>>>
class
uniTest:
...
def
__unicode__(
self
):
...
return
'xpleaf'
...
def
__repr__(
self
):
...
return
'hello'
...
>>> c
=
uniTest()
>>>
str
(c)
'hello'
>>>
unicode
(c)
u
'xpleaf'
|
1
2
3
4
5
6
7
8
9
10
11
|
>>>
class
Test():
...
def
__repr__(
self
):
...
return
'OK'
...
>>> c
=
Test()
>>>
repr
(c)
'OK'
>>>
str
(c)
'OK'
>>>
unicode
(c)
u
'OK'
|
编码:encode
解码:decode
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
#!/usr/bin/env python
CODEC
=
'utf-8'
FILE
=
'unicode.txt'
hello_out
=
u
'Hello world!\n'
bytes_out
=
hello_out.encode(CODEC)
f
=
open
(
FILE
,
'w'
)
f.write(bytes_out)
f.close()
f
=
open
(
FILE
,
'r'
)
bytes_in
=
f.read()
f.close()
hello_in
=
bytes_in.decode(CODEC)
print
hello_in,
|
1
2
|
$
/usr/bin/python2
.7
/home/xpleaf/PycharmProjects/Python_book/6/uniFile
.py
Hello world!
|
程序中出现字符串是加个前缀'u'
不要用str()函数,用unicode()代替
不要用string模块——如果传给它的是非ASCII字符,它会把一切搞砸
不到必须时不要在你的程序里面编码Unicode字符,只在你要写入文件或数据库或网络时,才调用encode()函数;相应地,只在你需要把数据读回来的时候才调用decode()函数
1
2
3
4
5
6
7
8
9
|
>>> t
=
'严'
>>>
print
t
严
>>> t
'\xe4\xb8\xa5'
>>> t.encode(
'utf-8'
)
Traceback (most recent call last):
File
"<stdin>"
, line
1
,
in
<module>
UnicodeDecodeError:
'ascii'
codec can't decode byte
0xe4
in
position
0
: ordinal
not
in
range
(
128
)
|
1
2
|
>>>
type
(t)
<
type
'str'
>
|
1
2
3
4
5
|
>>> t
=
u
'严'
>>> t
u
'\u4e25'
>>> t.encode(
'utf-8'
)
'\xe4\xb8\xa5'
|
1
2
3
4
5
|
xpleaf@leaf:~$ cat test.py
u
=
'严'
xpleaf@leaf:~$ python test.py
File
"test.py"
, line
1
SyntaxError: Non
-
ASCII character
'\xe4'
in
file
test.py on line
1
, but no encoding declared; see http:
/
/
python.org
/
dev
/
peps
/
pep
-
0263
/
for
details
|
1
2
3
4
|
xpleaf@leaf:~$ cat test.py
# coding: utf-8
u
=
'严'
xpleaf@leaf:~$ python test.py
|
1
2
3
4
5
|
xpleaf@leaf:~$ python test.py
Traceback (most recent call last):
File
"test.py"
, line
3
,
in
<module>
u.encode(
'utf-8'
)
UnicodeDecodeError:
'ascii'
codec can't decode byte
0xe4
in
position
0
: ordinal
not
in
range
(
128
)
|
内建的unicode()函数:Unicode字符串工厂函数,接受string做参数,返回一个Unicode字符串
内建的decode()/encode()方法
Unicode类型:Python数据类型,是basestring的子类
Unicode序数:指的是unichr()函数
强制类型字符转换:混合类型字符串操作需要把普通字符串转换成Unicode对象
字符串格式化操作符
1
2
|
>>> u
'%s %s'
%
(u
'abc'
,
'abc'
)
u
'abc abc'
|
string
re
struct
c/StringIO
base64
codecs
crypt
difflib
hashlib
hma
md5
rotor
sha
stringprep
textwrap
unicodedata