import requests
import re
import os
from urllib import parse
def download_images(keyword, num_images=10):
if not os.path.exists('images'):
os.makedirs('images')
keyword = parse.quote(keyword)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
count = 0
pn = 0
while count < num_images:
url = f'https://image.baidu.com/search/acjson?tn=resultjson_com&logid=&ipn=rj&ct=201326592&fp=result&queryWord={keyword}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word={keyword}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={pn}&rn=30'
try:
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
img_urls = re.findall('"thumbURL":"(.*?)"', response.text)
for img_url in img_urls:
if count >= num_images:
break
try:
img_response = requests.get(img_url, headers=headers)
file_name = f'images/wood_{count+1}.jpg'
with open(file_name, 'wb') as f:
f.write(img_response.content)
print(f'已下载第 {count+1} 张图片')
count += 1
except Exception as e:
print(f'下载图片失败: {str(e)}')
continue
pn += 30
except Exception as e:
print(f'请求失败: {str(e)}')
break
if __name__ == '__main__':
keyword = '木材'
num_images = 2000
print(f'开始下载"{keyword}"的图片...')
download_images(keyword, num_images)
print('下载完成!')