- 第一步:开门见山,如图所示:
- 断点调试,分析代码,定位加密位置,如图所示:
- 第三步:全扣webpack,全局化加载器导入模块处,外部调用即可,如图所示:
- 第四步:全扣难免要补环境的,代码如下:
js代码
window = global;
delete global;
window.outerHeight = 1067;
window.HTMLElement = function (ele){
console.log('HTMLElement:::', ele)
}
document = {
addEventListener: function (ele){
console.log(ele)
},
visibilityState: 'visible',
createElement: function (ele){
console.log('createElement:::', ele)
},
getElementById: function (ele){
console.log('getElementById:::', ele)
},
referrer: 'https://www.mashangpa.com/',
cookie: '_nano_fp=Xpmyn0mJlp9an0TJlT_HGuTzlWAsbBrvgDFr0Id7; Hm_lvt_0d2227abf9548feda3b9cb6fddee26c0=1754743882,1754788130,1754789668,1754791984; HMACCOUNT=6D71F8525EDFF963; Hm_lpvt_0d2227abf9548feda3b9cb6fddee26c0=1754800093'
};
window.screen = {
availWidth: 1707
};
history = {
back: function (){}
};
location = {
href: 'https://www.mashangpa.com/problem-detail/18/',
port: ''
};
Element = {};
Element.prototype = {
attachShadow: function (){}
};
localStorage = {
getItem: function (ele){
console.log('getItem:::', ele)
}
}
// 此处省略webpack打包的代码
function get_sign(){
var ts = new Date()["getTime"]()
var m = window.encrypt_m(4)({
'serverTime': ts
})["messagePack"]() + btoa('luoge' + ts)
timestamp = ts;
return {
'm': m,
'timestamp': timestamp
}
}
console.log(get_sign())
py代码
import asyncio, aiohttp, execjs
class AsyncSpider(object):
def __init__(self):
self.url = 'https://www.mashangpa.com/api/problem-detail/18/data/'
self.headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36',
}
self.cookies = {
'sessionid':'7iz4z3zugx9xbdrm2ykqpfaoswvf7a3o',
}
self.semaphore = asyncio.Semaphore(3)
with open('2.js', 'r', encoding='utf-8') as f:
self.js_code = execjs.compile(f.read())
async def fetch_page(self, session, page):
async with self.semaphore:
params = {
'page': page,
}
m_t = self.js_code.call('get_sign')
self.headers['M'] = m_t['m']
self.headers['Timestamp'] = str(m_t['timestamp'])
async with session.get(self.url, headers=self.headers, cookies=self.cookies, params=params) as res:
data = await res.json()
return data.get('current_array', [])
async def parse_all_pages(self):
total_sum = 0
async with aiohttp.ClientSession() as session:
tasks = [self.fetch_page(session, page) for page in range(1, 21)]
results = await asyncio.gather(*tasks)
for array in results:
if array:
total_sum += sum(array)
print(total_sum)
if __name__ == '__main__':
spider = AsyncSpider()
asyncio.run(spider.parse_all_pages())