1. 什么是进程和线程?
进程
是操作系统资源分配的基本单位,
而线程
是处理器任务调度和执行的基本单位
每个进程至少有一个线程
image.png
2. 多线程的创建
2-1 单线程demo
def func(name): for i in range(1000): print(name,i) if __name__ == '__main__': func(name='hello') func(name='world')
运行结果:
image.png
可以看到只有运行了上面的那个函数才会开始下面的执行
而线程就是能够根据CPU资源的情况对线程的运行进行控制
下面是实现线程的两种方法
2-2 实现线程方法一:创建线程对象
from threading import Thread def func(name): for i in range(1000): print(name,i) if __name__ == '__main__': # 将func('hello')交给线程1 t1 = Thread(target=func,args=('hello',)) # 参数必须是以元祖的方式传递,因此需要加逗号,否则传递的为字符串 # 启动线程1 t1.start() t2 = Thread(target=func,args=('world',)) # 参数必须是以元祖的方式传递,因此需要加逗号,否则传递的为字符串 # 启动线程1 t2.start() for i in range(1000): print('xiaoshan',i)
image.png
结果混在一起执行,多任务执行
2-3 实现线程方法二:创建线程类
from threading import Thread class MyThread(Thread): def run(self): for i in range(500): print("thread",i) if __name__ == '__main__': t1 = MyThread() t1.start() for i in range(100): print("xianxue",i) print("over!!!")
多线程执行结果.png
3. 进程
3-1 创建多进程
和创建线程的方式一模一样
from multiprocessing import Process def func(name): for i in range(1000): print(name,i) if __name__ == '__main__': # 将func('hello')交给线程1 t1 = Process(target=func,args=('hello',)) # 参数必须是以元祖的方式传递,因此需要加逗号,否则传递的为字符串 # 启动线程1 t1.start() t2 = Process(target=func,args=('world',)) # 参数必须是以元祖的方式传递,因此需要加逗号,否则传递的为字符串 # 启动线程1 t2.start() for i in range(1000): print('xiaoshan',i)
image.png
4. 线程池
from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor def func(name): for i in range(100): print(name,i) if __name__ == '__main__': with ThreadPoolExecutor(20) as t: for i in range(10): t.submit(func,name=f'小单{i}') print('over!!')
image.png
3-3 案例,爬取北京新发地的物价:
import requests import json import csv from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor url = "http://www.xinfadi.com.cn/getPriceData.html" def download_page_content(url,current): headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36" } dat = { "limit": 20, "current": current, "pubDateStartTime": "2023/06/01", "pubDateEndTime": "2023/06/30", "prodPcatid": "1186", "prodCatid": "1199", "prodName": "" } resp = requests.post(url, data=dat, headers=headers) resp.encoding = 'utf-8' content = resp.text content = json.loads(content) prod_price_list = content["list"] for item in prod_price_list: prodName = item["prodName"] lowPrice = item["lowPrice"] highPrice = item["highPrice"] avgPrice = item["avgPrice"] pubDate = item["pubDate"] csvwriter.writerow([prodName,lowPrice,highPrice,avgPrice,pubDate]) resp.close() if __name__ == '__main__': f = open("xfdpriceData.csv",mode="w") csvwriter = csv.writer(f) with ThreadPoolExecutor(20) as t: for i in range(1,18): t.submit(download_page_content, url=url, current=i)
运行结果: