https://docs.python.org/3.6/library/multiprocessing.html#examples
从这里改造而来:
import time
from urllib import request
from multiprocessing import Process, Queue, current_process, freeze_support,cpu_count
def worker2(input, output):
for func, args in iter(input.get, 'STOP'):
result = nety(func, args)
output.put(result)
def nety(func, args):
result = func(args)
return 'process: %s done func:%s url:%s' % \
(current_process().name, func.__name__, args)
def getUrl(url):
res=request.urlopen(url)
#return res.read().decode('UTF-8')
return res
def test():
NUMBER_OF_PROCESSES = cpu_count()*2
urllist=['http://115.239.211.112/']*100
TASKS3=[(getUrl, url) for url in urllist]
# Create queues
task_queue = Queue()
done_queue = Queue()
# Submit tasks
'''
for task in TASKS1:
task_queue.put(task)
'''
for task in TASKS3:
task_queue.put(task)
# Start worker processes
for i in range(NUMBER_OF_PROCESSES):
Process(target=worker2, args=(task_queue, done_queue)).start()
# Get and print results
print('Unordered results:')
for i in range(len(TASKS3)):
print('\t', done_queue.get())
# Tell child processes to stop
for i in range(NUMBER_OF_PROCESSES):
task_queue.put('STOP')
if __name__ == '__main__':
freeze_support()
startTime=time.time()
test()
endTime=time.time()
print(endTime-startTime)
改进版本:
import time
from urllib import request
from multiprocessing import Process, Queue, current_process,cpu_count,freeze_support
import os
def worker2(queueIn, queueOut):
for func, args in iter(queueIn.get, 'STOP'):
result = netty(func, args)
queueOut.put(result)
def netty(func, args):
result = func(args)
#return ('process: %s done func:%s url:%s' % (current_process().name, func.__name__, args),result)
return (args, result) #返回传入的参数和输出的结果
def getUrl(url):
res=request.urlopen(url)
#return res.read().decode('UTF-8')
return res.read()
def test():
NUMBER_OF_PROCESSES = cpu_count()*1
#urllist=['http://192.168.199.119:8080/']*10
urllist = []
urlFileName = 'urls\\urls.txt'
with open(urlFileName,'r') as f:
for line in f.readlines():
urllist.append(line.replace('\n',''))
urllist = urllist*5
TASKS3 = [(getUrl, url) for url in urllist]
# Create queues
task_queue = Queue()
done_queue = Queue()
# Submit tasks
for task in TASKS3:
task_queue.put(task)
# Start worker processes
for i in range(NUMBER_OF_PROCESSES):
Process(target=worker2, args=(task_queue, done_queue)).start()
# Get and print results
print('Unordered results:')
for i in range(len(TASKS3)):
# print(done_queue.get())
res=done_queue.get()
url=res[0]
data=res[1]
#fileName = 'DATA\\'+url[7:].replace('/', '').replace(':', '_')+'_'+str(i)+'.txt'
#fileName='DATA\\'+'data'+str(i)+'.data'
fileName='DATA\\'+os.path.basename(url).replace('?','_')+'_'+str(i)+'.data'
with open(fileName,'wb') as f:
f.write(data)
# Tell child processes to stop
for i in range(NUMBER_OF_PROCESSES):
task_queue.put('STOP')
if __name__ == '__main__':
freeze_support()
startTime = time.time()
test()
endTime = time.time()
print(endTime-startTime)