简短爬虫图片代码

发布于:2022-12-28 ⋅ 阅读:(393) ⋅ 点赞:(0)
import json
import time
import requests
from datetime import datetime
import os

class Spider:
    # 由一个类变成对象的时候,会自动调用这个方法!!
    # self -----> 我们创建的那个对象,也就是baidu
    # 作用 :对对象的属性,进行初始化
    # baidu.word
    # baidu.page_size
    def __init__(self,word,page_size):
        self.word = word
        self.page_size = page_size

    def get_filename(self):
        return datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")


    def down_img(self):
        try:
            headers = {
                'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.27'
            }


            res_data = requests.get(f"https://image.baidu.com/search/acjson?tn=resultjson_com&logid=7915750440644539160&ipn=rj&"
                                    f"ct=201326592&is=&fp=result&fr=&"
                                    f"word={self.word}&cg=star&"
                                    f"queryWord={self.word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&"
                                    f"latest=&copyright=&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&"
                                    f"expermode=&nojc=&isAsync=&pn=30&rn={self.page_size}&gsm=1e&1662382670563=",headers=headers)

            # res_data = requests.get(f"https://www.baidu.com/sugrec?cb=jQuery111107248962677088255_1662381903157&ie=utf-8&"
            #                         f"wd={word}&"
            #                         f"prod=open_image&t=0.3994684602509928&_=1662381903158")
            # print(res_data)


            # 如果服务器端返回的是一个字符串的,用text,
            # 如果返回的是一个二进制(图片、MP3、视频..)用content
            res_dic=json.loads(res_data.text)  # 把json字符串转成了字典
            # print(res_dic)
            i = 1
            for item in res_dic["data"]:
                # 拿到了图片地址
                img_url = item.get("thumbURL","")
                # 图片地址拿到以后, 如何把图片下载到本机?
                img_data = requests.get(img_url,headers=headers).content

                if not os.path.exists('img'+self.word):
                    os.mkdir('img'+self.word)
                    with open(f"img{self.word}/{self.get_filename()}.jpg", "wb") as f:
                        print(f"正在下载第{i}张照片")
                        f.write(img_data)
                        i += 1
                else:
                    with open(f"img{self.word}/{self.get_filename()}.jpg", "wb") as f:
                        print(f"正在下载第{i}张照片")
                        f.write(img_data)
                        i += 1

        except Exception as e:
            print(e)

# 程序入口
if __name__ == '__main__':
    word = input("请输入你要爬取的关键字:")

    page_size = int(input("请输入爬取的图片个数:"))

    baidu = Spider(word,page_size)
    baidu.down_img()



网站公告

今日签到

点亮在社区的每一天
去签到