python多进程
python多进程的使用有两种方式:
- multiprocessing
- concurrent的使用方式
multiprocessing的使用方式
- 定义线程池的数量
- 开始处理,结果回调
下面以多进程下载图像为例:
import multiprocessing
import requests
from io import BytesIO
from PIL import Image, ImageFont, ImageDraw# 多进程函数下载图像
def multiprocess_download_images(urls, num_process=10, download_dir="./tmp_check"):''':param urls: 下载:param num_process: 开启的线程数:param download_dir::param try_nums::return:'''os.makedirs(download_dir, exist_ok=True)if not isinstance(urls, list):urls = [urls]download_pools = multiprocessing.Pool(processes=num_process)for url in urls:download_pools.apply_async(download_single_image, (url, download_dir, 1,), callback=call_back)download_pools.close()download_pools.join()if len(os.listdir(download_dir)) == 0:return Falsereturn True# 下载单个图像
def download_single_image(url, download_dir, timeout=0.5, try_num=3):attempt = 0os.makedirs(download_dir, exist_ok=True)while attempt < try_num:try:response = requests.get(url, timeout=timeout)image = Image.open(BytesIO(response.content))save_path = os.path.join(download_dir, os.path.basename(url))image.save(save_path)return Trueexcept Exception as e:attempt = attempt + 1return False# 多进程的后处理函数
def call_back(return_value):print("return_value:", return_value)if __name__ == "__main__":start_time = time.time()url = "https://huichuan-mc.sm.cn/210172767/2302123579a068b4ee35f0bf9a59a4cb48b673.png"urls = ["https://huichuan-mc.sm.cn/210172767/2302123579a068b4ee35f0bf9a59a4cb48b6731.png","https://huichuan-mc.sm.cn/210172767/230711e31648e9e86837981560794eb42b0c6a.png","https://huichuan-mc.sm.cn/210172767/231228ea9df9bb65549874ac9acfd6a13e7c44.png","https://huichuan-mc.sm.cn/210172767/23122876aec3054982c591ca6bc8af5af877ee.png","https://huichuan-mc.sm.cn/210172767/23122833f8f22e43b7d9d5b6c90c617e482a57.png","https://huichuan-mc.sm.cn/210172767/2212205c641bec29a2a0ba0d6fca100b8e86dc.png","https://huichuan-mc.sm.cn/210172767/2212212aadd8efa1e2e20e14bb3f1a757b2cb2.png","https://huichuan-mc.sm.cn/210172767/231227d1234e9381098211acb3b6b412a2b3ab.jpeg","https://huichuan-mc.sm.cn/210172767/231212a9c43a82e881224338cb4167a98cdeb3.png",]download_dir = "./tmp"multiprocess_download_images(urls, download_dir="./tmp")
看有的用法是这样的:
async_result = download_pools.apply_async(download_single_image, (url, download_dir, 1,), callback=call_back)
result = async_result.get()
但上面的这个用法会阻塞进程直到进程完成直到可用,这里不推荐使用。使用上面的demo即可
concurrent的使用方式
- 定义好基本的处理函数即可
import requests
import concurrent.futures
from io import BytesIO
from PIL import Image, ImageFont, ImageDrawdef multiprocess_download_images(urls, num_process=10, download_dir="./tmp_check"):with concurrent.futures.ProcessPoolExecutor(max_workers=num_process) as executor:futures = [executor.submit(download_single_image, url, download_dir, timeout=0.5, try_num=3)for url in urls]# 下面的代码是进行结果的处理for future in concurrent.futures.as_completed(futures):stat = future.result()if not stat:print(f"ERROR info: 子句语音合成有失败,请检查语音合成")if len(os.listdir(download_dir)) == 0:return Falsereturn Truedef download_single_image(url, download_dir, timeout=0.5, try_num=3):attempt = 0os.makedirs(download_dir, exist_ok=True)while attempt < try_num:try:response = requests.get(url, timeout=timeout)image = Image.open(BytesIO(response.content))save_path = os.path.join(download_dir, os.path.basename(url))image.save(save_path)return Trueexcept Exception as e:attempt = attempt + 1return Falseif __name__ == "__main__":start_time = time.time()url = "https://huichuan-mc.sm.cn/210172767/2302123579a068b4ee35f0bf9a59a4cb48b673.png"urls = ["https://huichuan-mc.sm.cn/210172767/2302123579a068b4ee35f0bf9a59a4cb48b6731.png","https://huichuan-mc.sm.cn/210172767/230711e31648e9e86837981560794eb42b0c6a.png","https://huichuan-mc.sm.cn/210172767/231228ea9df9bb65549874ac9acfd6a13e7c44.png","https://huichuan-mc.sm.cn/210172767/23122876aec3054982c591ca6bc8af5af877ee.png","https://huichuan-mc.sm.cn/210172767/23122833f8f22e43b7d9d5b6c90c617e482a57.png","https://huichuan-mc.sm.cn/210172767/2212205c641bec29a2a0ba0d6fca100b8e86dc.png","https://huichuan-mc.sm.cn/210172767/2212212aadd8efa1e2e20e14bb3f1a757b2cb2.png","https://huichuan-mc.sm.cn/210172767/231227d1234e9381098211acb3b6b412a2b3ab.jpeg","https://huichuan-mc.sm.cn/210172767/231212a9c43a82e881224338cb4167a98cdeb3.png",]download_dir = "./tmp"multiprocess_download_images(urls, download_dir="./tmp")
另外concurrent还有多线程的使用方式,下面附上,实际上python的多线程是伪多线程,这里建议使用多进程
def multiprocess_download_images(urls, num_process=10, download_dir="./tmp_check"):with concurrent.futures.ThreadPoolExecutor(max_workers=num_process) as executor:futures = [executor.submit(download_single_image, url, download_dir, timeout=0.5, try_num=3)for url in urls]# 下面的代码是进行结果的处理for future in concurrent.futures.as_completed(futures):stat = future.result()if not stat:print(f"ERROR info: 子句语音合成有失败,请检查语音合成")if len(os.listdir(download_dir)) == 0:return Falsereturn True