1.查询参数的基本使用
我们再使用百度搜索的时候经常发现url地址中会有一个?,那么该问好后边的就是请求参数,又叫做查询字符串
1.什么叫做查询参数
url中?后面就是请求参数.示例代码如下:
import requests
# 设置爬取的目标地址
url = 'https://www.baidu.com/s'
# url伪装
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
# 设置请求参数
param = {
'wd':'张柏芝'}
# 发起请求
response = requests.get(url, headers=headers, params=param)
print(response.status_code)
执行结果如下:
2.查询参数的形式:
kw = {‘wd’:‘长城’}
3.request传递参数的用法
关于参数的注意点:很多参数是没有用的,例如百度搜索中的url,其中参数只有一个字段有用,其他的都可以删除,如何确定哪些请求参数是有用的或者没有用的:挨个尝试!对应的,在后续的爬虫中,遇到很多参数的url地址,都可以尝试删除参数
需求:爬取今日头条指定词条对应的搜索结果页面(简易网页采集器)
# 需求:模拟浏览器发送关键字搜索关键字指定数据
import requests
# 创建会话对象
headers = {
'user_agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
# 指定url地址
url = 'https://so.toutiao.com/search'
# 使用扫描器
ip = input('指定搜索关键字:')
param = {
'keyword': ip
}
# 发送请求
response = requests.get(url, headers=headers, params=param)
response.encoding = 'utf-8'
# 获取响应数据
res_text = response.text
fileName = ip+'.html' #获取数据创建的文件名
# 持久化存储
with open(fileName, 'w', encoding='utf-8') as f:
f.write(res_text)
print(fileName, '爬取成功!')
执行结果
打开html也可看见
爬取考前的喜剧电影
# 爬取豆瓣电影分页数据
import requests
import json
# 设置爬取目标地址
url = 'https://movie.douban.com/j/chart/top_list'
# 设置参数
param = {
'type': "24",
'interval_id': "100:90",
'action': "",
'start': "20",
'limit': "20"
}
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
# 发送请求
response = requests.get(url, params=param, headers=headers)
list_data = response.json()
# 持久化存储
fp = open('.douban_list_name.json', 'w', encoding='utf-8')
json.dump(list_data, fp, ensure_ascii=False)
fp.close()
print("保存成功")
执行结果
[
{
"rating": [
"9.0",
"45"
],
"rank": 21,
"cover_url": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p456703618.jpg",
"is_playable": true,
"id": "1310177",
"types": [
"剧情",
"喜剧",
"动画"
],
"regions": [
"日本"
],
"title": "东京教父",
"url": "https://movie.douban.com/subject/1310177/",
"release_date": "2003-11-08",
"actor_count": 35,
"vote_count": 278792,
"score": "9.0",
"actors": [
"江守彻",
"梅垣义明",
"冈本绫",
"饭塚昭三",
"加藤精三",
"石丸博也",
"槐柳二",
"屋良有作",
"寺濑今日子",
"能登麻美子"
],
"is_watched": false
},
{
"rating": [
"8.9",
"45"
],
"rank": 22,
"cover_url": "https://img2.doubanio.com/view/photo/s_ratio_poster/public/p2531065411.jpg",
"is_playable": true,
"id": "27060077",
"types": [
"剧情",
"喜剧",
"传记",
"音乐"
],
"regions": [
"美国",
"中国大陆"
],
"title": "绿皮书",
"url": "https://movie.douban.com/subject/27060077/",
"release_date": "2019-03-01",
"actor_count": 44,
"vote_count": 1838323,
"score": "8.9",
"actors": [
"维果·莫腾森",
"马赫沙拉·阿里",
"琳达·卡德里尼",
"塞巴斯蒂安·马尼斯科",
"迪米特·D·马里诺夫",
"迈克·哈顿",
"P·J·伯恩",
"乔·柯蒂斯",
"玛姬·尼克松",
"冯·刘易斯"
],
"is_watched": false
},
{
"rating": [
"8.9",
"45"
],
"rank": 23,
"cover_url": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2219011938.jpg",
"is_playable": true,
"id": "1291543",
"types"