python爬取木材的图片

发布于:2025-03-12 ⋅ 阅读:(81) ⋅ 点赞:(0)
import requests
import re
import os
from urllib import parse

def download_images(keyword, num_images=10):
    # 创建保存图片的文件夹
    if not os.path.exists('images'):
        os.makedirs('images')
    
    # 对关键词进行URL编码
    keyword = parse.quote(keyword)
    
    # 请求头
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    # 初始化计数器
    count = 0
    pn = 0  # 页码
    
    while count < num_images:
        # 构造请求URL
        url = f'https://image.baidu.com/search/acjson?tn=resultjson_com&logid=&ipn=rj&ct=201326592&fp=result&queryWord={keyword}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=&copyright=&word={keyword}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&force=&pn={pn}&rn=30'
        
        try:
            # 发送请求
            response = requests.get(url, headers=headers)
            response.encoding = 'utf-8'
            
            # 提取图片URL
            img_urls = re.findall('"thumbURL":"(.*?)"', response.text)
            
            # 下载图片
            for img_url in img_urls:
                if count >= num_images:
                    break
                    
                try:
                    # 获取图片内容
                    img_response = requests.get(img_url, headers=headers)
                    
                    # 保存图片
                    file_name = f'images/wood_{count+1}.jpg'
                    with open(file_name, 'wb') as f:
                        f.write(img_response.content)
                    
                    print(f'已下载第 {count+1} 张图片')
                    count += 1
                    
                except Exception as e:
                    print(f'下载图片失败: {str(e)}')
                    continue
            
            pn += 30  # 更新页码
            
        except Exception as e:
            print(f'请求失败: {str(e)}')
            break

if __name__ == '__main__':
    # 设置关键词和要下载的图片数量
    keyword = '木材'
    num_images = 2000
    
    print(f'开始下载"{keyword}"的图片...')
    download_images(keyword, num_images)
    print('下载完成!')