import json import time import requests from datetime import datetime import os class Spider: # 由一个类变成对象的时候,会自动调用这个方法!! # self -----> 我们创建的那个对象,也就是baidu # 作用 :对对象的属性,进行初始化 # baidu.word # baidu.page_size def __init__(self,word,page_size): self.word = word self.page_size = page_size def get_filename(self): return datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f") def down_img(self): try: headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.27' } res_data = requests.get(f"https://image.baidu.com/search/acjson?tn=resultjson_com&logid=7915750440644539160&ipn=rj&" f"ct=201326592&is=&fp=result&fr=&" f"word={self.word}&cg=star&" f"queryWord={self.word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&" f"latest=©right=&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&" f"expermode=&nojc=&isAsync=&pn=30&rn={self.page_size}&gsm=1e&1662382670563=",headers=headers) # res_data = requests.get(f"https://www.baidu.com/sugrec?cb=jQuery111107248962677088255_1662381903157&ie=utf-8&" # f"wd={word}&" # f"prod=open_image&t=0.3994684602509928&_=1662381903158") # print(res_data) # 如果服务器端返回的是一个字符串的,用text, # 如果返回的是一个二进制(图片、MP3、视频..)用content res_dic=json.loads(res_data.text) # 把json字符串转成了字典 # print(res_dic) i = 1 for item in res_dic["data"]: # 拿到了图片地址 img_url = item.get("thumbURL","") # 图片地址拿到以后, 如何把图片下载到本机? img_data = requests.get(img_url,headers=headers).content if not os.path.exists('img'+self.word): os.mkdir('img'+self.word) with open(f"img{self.word}/{self.get_filename()}.jpg", "wb") as f: print(f"正在下载第{i}张照片") f.write(img_data) i += 1 else: with open(f"img{self.word}/{self.get_filename()}.jpg", "wb") as f: print(f"正在下载第{i}张照片") f.write(img_data) i += 1 except Exception as e: print(e) # 程序入口 if __name__ == '__main__': word = input("请输入你要爬取的关键字:") page_size = int(input("请输入爬取的图片个数:")) baidu = Spider(word,page_size) baidu.down_img()