四、C扩展与性能优化
4.1 C扩展开发基础
编写C扩展可以绕过GIL,获得接近原生的性能。
// example.c - C扩展示例
#include <Python.h>
// 计算斐波那契数列
static PyObject* fibonacci(PyObject* self, PyObject* args) {
int n;
if (!PyArg_ParseTuple(args, "i", &n)) {
return NULL;
}
if (n <= 1) {
return PyLong_FromLong(n);
}
long a = 0, b = 1, c;
for (int i = 2; i <= n; i++) {
c = a + b;
a = b;
b = c;
}
return PyLong_FromLong(b);
}
// 释放GIL进行长时间计算
static PyObject* heavy_compute(PyObject* self, PyObject* args) {
int n;
if (!PyArg_ParseTuple(args, "i", &n)) {
return NULL;
}
// 释放GIL
Py_BEGIN_ALLOW_THREADS
long result = 0;
for (int i = 0; i < n; i++) {
result += i * i;
}
// 重新获取GIL
Py_END_ALLOW_THREADS
return PyLong_FromLong(result);
}
// 方法定义
static PyMethodDef Methods[] = {
{"fibonacci", fibonacci, METH_VARARGS, "计算斐波那契数列"},
{"heavy_compute", heavy_compute, METH_VARARGS, "释放GIL的计算"},
{NULL, NULL, 0, NULL}
};
// 模块定义
static struct PyModuleDef module = {
PyModuleDef_HEAD_INIT,
"fastmath",
"高性能数学计算模块",
-1,
Methods
};
// 初始化函数
PyMODINIT_FUNC PyInit_fastmath(void) {
return PyModule_Create(&module);
}
对应的setup.py:
from setuptools import setup, Extension
module = Extension('fastmath', sources=['example.c'])
setup(
name='fastmath',
version='1.0',
description='C扩展示例',
ext_modules=[module]
)
# 编译:python setup.py build_ext --inplace
4.2 Cython:Python与C的桥梁
Cython是编写C扩展的最佳选择,语法接近Python。
# fastmath.pyx
import cython
@cython.boundscheck(False) # 禁用边界检查
@cython.wraparound(False) # 禁用负索引
def fibonacci_cython(int n):
"""Cython版本的斐波那契"""
if n <= 1:
return n
cdef long a = 0
cdef long b = 1
cdef long c
cdef int i
for i in range(2, n + 1):
c = a + b
a = b
b = c
return b
# 类型化内存视图(高效数组操作)
def sum_array(double[:] arr):
"""使用内存视图求和"""
cdef double total = 0
cdef int i
for i in range(arr.shape[0]):
total += arr[i]
return total
# 使用C函数
from libc.math cimport sqrt
def fast_sqrt(double x):
return sqrt(x)
# 释放GIL
with cython.nogil:
# 这里的代码不持有GIL
pass
4.3 性能分析工具
import cProfile
import pstats
import line_profiler
import memory_profiler
import timeit
# 1. cProfile:函数级性能分析
def profile_cprofile():
pr = cProfile.Profile()
pr.enable()
# 执行代码
[x**2 for x in range(100000)]
pr.disable()
ps = pstats.Stats(pr).sort_stats('cumulative')
ps.print_stats(10)
# 2. line_profiler:行级分析
@profile # 需要:kernprof -l script.py
def slow_function():
total = 0
for i in range(10000):
for j in range(10000):
total += i * j
return total
# 3. memory_profiler:内存分析
@profile # 需要:python -m memory_profiler script.py
def memory_intensive():
data = []
for i in range(100000):
data.append([i] * 100)
return data
# 4. timeit:精确计时
def timeit_example():
# 单次计时
result = timeit.timeit(
'[x**2 for x in range(1000)]',
number=1000
)
print(f"平均耗时:{result / 1000 * 1000:.2f}ms")
# 重复计时
times = timeit.repeat(
'[x**2 for x in range(1000)]',
repeat=5,
number=1000
)
print(f"最快:{min(times) / 1000 * 1000:.2f}ms")
五、系统级编程与网络编程
5.1 底层I/O与文件系统
import os
import fcntl
import mmap
import select
import socket
# 内存映射文件
def mmap_example():
with open('large_file.txt', 'r+b') as f:
# 映射整个文件
mm = mmap.mmap(f.fileno(), 0)
# 直接内存访问
print(mm[:100]) # 读取前100字节
mm[10:20] = b'ABCDEFGHIJ' # 直接修改
mm.close()
# 非阻塞I/O
def non_blocking_io():
fd = os.open('file.txt', os.O_RDONLY | os.O_NONBLOCK)
# 使用select监控I/O
rlist, wlist, xlist = select.select([fd], [], [], 5.0)
if fd in rlist:
data = os.read(fd, 1024)
os.close(fd)
# 异步I/O(Linux AIO)
import aiofiles
async def async_file_io():
async with aiofiles.open('large_file.txt', 'r') as f:
content = await f.read()
print(f"读取{len(content)}字节")
5.2 高性能网络编程
import socket
import selectors
import asyncio
# 使用selectors实现事件驱动
class SelectorServer:
def __init__(self, host='localhost', port=8888):
self.selector = selectors.DefaultSelector()
self.server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.server.bind((host, port))
self.server.listen()
self.server.setblocking(False)
self.selector.register(self.server, selectors.EVENT_READ, self.accept)
def accept(self, sock):
conn, addr = sock.accept()
conn.setblocking(False)
self.selector.register(conn, selectors.EVENT_READ, self.read)
def read(self, conn):
data = conn.recv(1024)
if data:
conn.send(b'ECHO: ' + data)
else:
self.selector.unregister(conn)
conn.close()
def run(self):
while True:
events = self.selector.select()
for key, mask in events:
callback = key.data
callback(key.fileobj)
# 使用socketserver实现高并发
from socketserver import TCPServer, StreamRequestHandler, ThreadingMixIn
class ThreadingTCPServer(ThreadingMixIn, TCPServer):
"""多线程TCP服务器"""
pass
class EchoHandler(StreamRequestHandler):
def handle(self):
data = self.rfile.readline().strip()
self.wfile.write(b'ECHO: ' + data)
def run_tcp_server():
server = ThreadingTCPServer(('localhost', 8889), EchoHandler)
server.serve_forever()
六、设计模式与架构模式
6.1 架构模式
# 1. 事件驱动架构
class EventBus:
"""事件总线"""
def __init__(self):
self._handlers = {}
def subscribe(self, event_type, handler):
if event_type not in self._handlers:
self._handlers[event_type] = []
self._handlers[event_type].append(handler)
def publish(self, event_type, data=None):
if event_type in self._handlers:
for handler in self._handlers[event_type]:
handler(data)
# 2. 管道与过滤器架构
class Pipeline:
"""数据处理管道"""
def __init__(self):
self.filters = []
def add_filter(self, filter_func):
self.filters.append(filter_func)
return self
def execute(self, data):
for filter_func in self.filters:
data = filter_func(data)
return data
# 使用
pipeline = Pipeline()
pipeline.add_filter(lambda x: x.strip()) \
.add_filter(lambda x: x.lower()) \
.add_filter(lambda x: x.replace(' ', '_'))
result = pipeline.execute(" Hello World ")
print(result) # "hello_world"
# 3. 插件架构
import importlib
import pkgutil
class PluginManager:
"""插件管理器"""
def __init__(self, plugin_package):
self.plugin_package = plugin_package
self.plugins = {}
self.load_plugins()
def load_plugins(self):
package = importlib.import_module(self.plugin_package)
for finder, name, ispkg in pkgutil.iter_modules(package.__path__):
module = importlib.import_module(f"{self.plugin_package}.{name}")
if hasattr(module, 'register'):
module.register(self)
def register(self, name, plugin_class):
self.plugins[name] = plugin_class
def get_plugin(self, name):
return self.plugins.get(name)
七、深度学习与AI工程化
7.1 自定义PyTorch扩展
# 自定义C++扩展示例
import torch
from torch.autograd import Function
class CustomFunction(Function):
@staticmethod
def forward(ctx, input):
# 前向传播
ctx.save_for_backward(input)
return input * input
@staticmethod
def backward(ctx, grad_output):
# 反向传播
input, = ctx.saved_tensors
return grad_output * 2 * input
# 使用
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = CustomFunction.apply(x)
y.sum().backward()
print(x.grad) # tensor([2., 4., 6.])
八、Python高阶学习路径
8.1 知识体系图
高阶开发者的标志不是掌握多少"高级特性",而是能够从系统层面思考问题,在遇到性能瓶颈时能够定位到问题本质,在需要扩展Python时能够写出优雅的C扩展。愿你在Python的世界里,不仅成为技术的高手,更成为能够创造工具、定义规则的先行者。
来源:
https://app-ad5sxofh8phd.appmiaoda.com