下载逻辑
今天想要寻找一个网站下载一些需要的音效,了解到有爱给网这个平台,上面有很多优质的音频数据,但就当我想要保存的时候却给我当头一棒
居然需要我登录,于是我开始抓包试图获取音频文件的链接,二话不说我就开始输入链接到浏览器等待下载完成的提示,没想到居然403
我勒个豆,这还有反爬?在以前的印象中这个网站不就是直接下载的吗,怎么现在还有反爬手段了?没办法,就想着用这个练练手,没想到这个JS逆向这么恐怖
我找来找去找到了链接生成接口加密参数的地方,然后开始打断点获取请求参数来写爬虫,但是在我多次尝试后发现这个加密参数(v)是有时间限制的,但还是找到了这个v的生成函数,可是这一个传参就比较难为人了,当看到这个G参数的时候我就知道这个逆向是会脱一层皮的,于是我放弃了逆向的打算,开始断点获取这个参数,输入我的代码中进行获取,但惊奇的发现除了这个加密参数以为,还存在cookie的校验,cookie校验不过关依旧会导致数据获取失败,介于目前的能力发现自己无法通过js逆向来搞定,于是开始尝试直接下载这个MP3文件,但再一次惊奇的发现在python把所有参数都带齐的条件下,用requests获取这个文件就是奇慢无比,但是当我用js发送请求的时候速度就会正常,不知是为何?
cookie的发现
在发现有更新cookie的动作后,开始尝试两次请求,第一次获取cookie,第二次才是正式请求
这一次成功拿到请求,于是我便在一次不信邪的开始了第二次逆向,开始对找到的函数入手
这是目前的下载代码
import requests,base64
import execjs,os
import subprocess
def getUrl(headerss,data):
cookies = {
'SESSION': '003c4dee-902a-470e-b2c1-c5b68256cb25',
'geiweb-v': 'zZ+S93HA1Qe4Kw7ViGc+cvqZ3tMHPe5siGxgy8ipaMQb/6Ho2gnwvuSHcZZt0MUB',
'OooOO000oOOO00o': 'c392c65b6a0f472fbda7828bff2ae832',
'SERVERID': '8aa9d663d189326e7632b80dbf2e742f|1751880734|1751880496',
}
headers = {
'Referer': 'https://www.aigei.com/sound/class/is_vip_true?page=5',
'priority': 'u=1, i',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36 Edg/138.0.0.0', 'x-requested-with': 'XMLHttpRequest',
# 'cookie': 'hhhssi1ill1i=aed61887468e9cbd4fb9def33f368a9f; oOO0OO0oOO00oo0o=true; gei_d_u=87ddda7911694409a297a9b35a9a4c59; SESSION=003c4dee-902a-470e-b2c1-c5b68256cb25; geiweb-v=zZ+S93HA1Qe4Kw7ViGc+cvqZ3tMHPe5siGxgy8ipaMQb/6Ho2gnwvuSHcZZt0MUB; OooOO000oOOO00o=c392c65b6a0f472fbda7828bff2ae832; Hm_lvt_0e0ebfc9c3bdbfdcaa48ccbc43e864f9=1751866669,1751869459; Hm_lpvt_0e0ebfc9c3bdbfdcaa48ccbc43e864f9=1751880590; SERVERID=8aa9d663d189326e7632b80dbf2e742f|1751880734|1751880496',
}
headers.update(headerss)
#更新cookie
response = requests.post('https://www.aigei.com/f/d/audio_mp3', cookies=cookies, headers=headers, data=data)
# print(cookies)
for cookie in response.cookies:
cookies[cookie.name]=cookie.value
print(f'更新cookie中--{
cookie.name}: {
cookie.value}')
#获取url
response = requests.post('https://www.aigei.com/f/d/audio_mp3', cookies=cookies, headers=headers, data=data)
#print(response.text)
url=base64.b64decode(response.json()["message"])
url=url.decode('utf-8')
print(f"url 解析成功--{
url}")
return url
def call_download_mp3(url, output_dir, filename):
try:
# 调用Node.js执行abc.js文件
result = subprocess.run(['node', r'D:\JiaQing\爬虫逆向\MD5\agwd.js', url, output_dir, filename],
capture_output=True, text=True, check=True)
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"执行出错: {
e.stderr}")
#这里输入你获取请求的headers和data
#
#
# headers = {
# 'cccllpptttgt': '8957eb9249ef4f7eadb2571607fd081b',
# 'X-Requested-With': 'XMLHttpRequest',
# 'X-Requested-ETag': 'ZZwzFXLqsF5JWp6zUreol5wGlr3Ee3ALvJILYCc+1H/YXsXWKmHaHpK4PB/7uTgXjUap112QFnziqQkk0OL98wrC/h9BrNVYmZAtYQfEMZMem3HTB2x0t4vcfLlLJyUQbD0QE3dm+fc=',
# 'Accept': '*/*',
# 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
# 'Referer': 'https://www.aigei.com/sound/class/is_vip_true?page=5',
# }
#
# data = {
# 'v': 'WMZ0h+I5D+CbzUJFzqeduhdu3y59c6/I1ov/bVyyOR1HUq7MDDw5Xd/mJm3aOnyYO4hMfu9QEUCbLVCjM6VNvsR+n0AS7mFr/tvZA8RcyBmC8D/s47n47wKpAWmQvzlydMOXNQIFu+K8G/CArxSc0eluZK7Fn1L3xpYskf1KlDsFyeuIfF1LyC4sYpVJUp0F1I+6JmsjpEdAkxundob39M1D82yu4WcKzSAstkfMHlZXgSZ52xBcv/p/GPctx9iMgpl/S1vcY6058BnfORW/sQX3Zx39Ri/ud1I7TR8XLxnYFg7awwBZxXvu8Jh3sPhP0SAstkfMHlZXFrR5L/ByzP/LPjVeDua+Okgn5KhRMbLs=',
# }
url=getUrl(headers,data)
output_dir = "./downloads"
filename = "audio.mp3"
call_download_mp3(url, output_dir, filename)
'''
fget({
"type": "audio_mp3",
"fileUuid": "",
"model": "play",
"itemId": "76105970",
"rescUrl": "76105970",
"expireTime": "1751976000528",
"token": "026a8119a616988c4ca3152acfa78a9c",
"callBack": "callBackAudioFilePlay",
"resJsCallback": "downloadAudioCallback"
})
JS发起请求的操作
'''
这是扣了半小时的代码(没有扣完,实在是太难了,放弃了)
function fget(U) {
var D = new Date()['getTime']() - pageCreateTime
, Y = Math['ceil'](D / (0x3e8 * 0x3c * 0x3c));
Y >= 0x18 && (window['location']['href'] = window['location']['href']);
var V = U['model']
, F = U['itemId']
, R = U['fileUuid']
, K = U['item']
, J = U['callBack']
, L = U['type']
, I = U['rescUrl']
, S = U['expireTime']
, O = U['confirm']
, Q = U['token']
, C = U['pkg']
, W = U['pkgItems']
, X = U['saveAs']
, B = U['resJsCallback']
, z = U['vcode'];
_curFileDownOpts = U;
var T = '/f/d/' + L
, G = dfu(L, I, S, Q, z, C, X)
, E = V == 'playLog' ? !![] : ![]
, y = V == 'cache' ? !![] : ![];
if (fIsCached) {
if (V == 'cache' && fIsCached(F, null))
return;
if (V == 'play' && fIsCached(F, L)) {
window[J](F);
return;
}
}
G['isc'] = y,
G['ilg'] = E,
G['pkgItems'] = W,
G['confirm'] = O || ![];
!_underscore['isUndefined'](U['focusTipScene']) && (G['focusTipScene'] = U['focusTipScene']);
!_underscore['isUndefined'](U['downUuid']) && (G['downUuid'] = U['downUuid']);
var A = cqbj(G)
, Z = $['extend'](A, U['gt_captchaResult'] || {
});
$['ajax']({
'type': 'post',
'url': T,
'data': Z,
'beforeSend': function(P) {
var M = $('#pIii111lllE')['val']();
P['setRequestHeader']('X-Requested-ETag', cupie(G['ud'] + '-' + M));
},
'success': function(P) {
try {
P = $['parseJSON'](P);
} catch (N) {
if (J) {
closeVcodeDialog(),
window[J](K, U, P);
B && window[B] && window[B](K, U, P);
return;
} else {
var M = {
};
M['type'] = BootstrapDialog['TYPE_WARNING'],
M['message'] = '很抱歉,系统出现了错误,文件下载失败,我们已经记录下错误,将会尽快解决!code=1',
gei['util']['pop'](M),
reportWarn('download\x20server\x20json\x20parse\x20exception!\x20url=' + T, 'download');
}
}
proessResultJson(P, U);
}
});
}
function dfu(U, m, q, j, s, u, D) {
var Y = fInction(U, m);
Y += '-' + q + '-' + j;
var V = {
};
return V['ud'] = getUUID(),
V['v'] = Y,
V['type'] = U,
V['rescUrl'