模型性能压测报告
# 压测脚本核心逻辑(Python)
from aliyun import TongyiLingma
models = ["DeepSeek-R1", "Qwen2.5-72B", "DeepSeek-V3"]
prompts = {
"代码补全": "用Python实现快速排序,要求时间复杂度O(nlogn)",
"SQL生成": "根据用户行为日志表(user_id, action, timestamp),统计最近7天每日活跃用户数",
"测试生成": "为Spring Boot用户注册API生成JUnit5测试用例"
}
for model in models:
client = TongyiLingma(model=model)
for task, prompt in prompts.items():
start = time.time()
response = client.generate(prompt)
latency = time.time() - start # 记录响应时间
save_to_csv(model, task, latency, response.usage.tokens)
压测结果对比表
| 任务类型 | 平均响应(s) | Tokens/请求 | 代码通过率 |
|-----------------|------------|-------------|-------------|------------|
| DeepSeek-R1 | 代码补全 | 2.1 | 512 | 82% |
| Qwen2.5-72B | 代码补全 | 3.8 | 894 | 95% |
| DeepSeek-V3 | SQL生成 | 1.9 | 327 | 91% |