js cracking
There is a way
thinking
Get the request address - > view the request method - > the post request must have a form - > Paste and copy the form, and empty the unknown things first
For example, the salt sign below
def seng_request(self): form_data = { #'i': 'aha', 'i': '', 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', #'salt': '15932458043921', 'salt': '', #'sign': '24d1ac950b72ae268b1704034a5c172c', 'sign': '', #'ts':' 1593245804392 ', time stamp 'ts': self.ts, #'bv': '02a6ad4308a3443b3732d855273259bf', 'bv': '', 'doctype': 'json', 'version': '2.1', 'keyfrom':' fanyi.web', 'action': 'FY_BY_CLICKBUTTION', }
We need to perfect this form, so we have to crack it
You can go to the web page to find a script that may be related, and copy it to pycharm
Here's a tip. When the code on pycharm s is messy, you can use the shortcut key ctrl + alt+L to improve the cleanliness of the code, or
After copying, the code can't read at all. If we just want to find what we want to find, we can use = = ctrl + l = = to search key parameters in pycharm to see if we can analyze the corresponding values.
We've got salt, and then we can improve the code.
After the completion of the improvement, then analysis,
code
import time import requests import random import hashlib class YouDaoSpider(): def __init__(self): self.url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' self.headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Connection': 'keep-alive', 'Content-Length': '260', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Cookie':'OUTFOX_SEARCH_USER_ID=1667356478@10.169.0.84; _ntes_nnid=b117014d95a4c5cf6832f8c92a045dcc,1589801960374; OUTFOX_SEARCH_USER_ID_NCOO=598000807.9036449; JSESSIONID=aaa3wgVaNvg1XdFRZ10lx; ___rl__test__cookies=1593256681647', 'Host': 'fanyi.youdao.com', 'Origin': 'http://fanyi.youdao.com', 'Referer': 'http://fanyi.youdao.com/', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', } self.appversion = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' self.kw = input('Please enter the words you want to translate:') self.ts = self.get_ts() self.salt = self.get_salt() self.bv = self.get_bv() self.sign = self.get_sign() def send_request(self): form_data = { #'i': 'aha', 'i': self.kw, 'from': 'AUTO', 'to': 'AUTO', 'smartresult': 'dict', 'client': 'fanyideskweb', #'salt': '15932458043921', 'salt': self.salt, #'sign': '24d1ac950b72ae268b1704034a5c172c', 'sign': self.sign, #'ts':' 1593245804392 ', time stamp 'ts': self.ts, #'bv': '02a6ad4308a3443b3732d855273259bf', 'bv': self.bv, 'doctype': 'json', 'version': '2.1', 'keyfrom':' fanyi.web', 'action': 'FY_BY_CLICKBUTTION', } response =requests.post(url=self.url,data=form_data,headers=self.headers) print(response.text) def get_ts(self): #His timestamp is 13 bits, but the default timestamp in python is 13 return str(int(time.time())*1000) def get_salt(self): return self.ts + str(random.randint(0,10)) def get_bv(self): md5 = hashlib.md5() md5.update(self.appversion.encode()) return md5.hexdigest() def get_sign(self): md5 = hashlib.md5() data = "fanyideskweb" + self.kw + self.salt + "mmbP%A-r6U3Nw(n]BjuEU" md5.update(data.encode()) return md5.hexdigest() if __name__ == '__main__': yd = YouDaoSpider() yd.send_request()
Product catalog
There are parameters in the above, which we may guess, but like the product catalog, we request and save the data
What we show is this. There is no data on the web page, we can only see that it is js syntax, and others can't understand (such as functions)
At this time, we can run js directly
Create a js file and copy the useful things
If you want to execute js, you need to install something
1, Installation
pip install PyExecJS //Image source installation PIP3 install - I https://pypi.tuna.tsinghua.edu.cn/simple PyExecJS
2, Execute js
execjs.eval("Date.now()") //return:1522847001080
ctx = execjs.compile(""" function add(x, y) { return x + y; } """) ctx.call("add", 1, 2) //Return value:3
node = execjs.get() # A library for executing JavaScript code through python code file = 'product.js' ctx = node.compile(open(file).read()) data = ctx.eval("data") # To execute function variables in js verify_data = ctx.eval("verify")
code
import requests import execjs # url ='http://www.300600900.cn/' # # headers = { # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', # } # response = requests.get(url=url,headers=headers) # with open('prodct.html','w') as f: # f.write(response.text) # ej = execjs.get() js_name = 'product.js' node = ej.compile(open(js_name).read()) cookie_date = node.eval('cookie_date') security_verify_data = node.eval('security_verify_data') print(cookie_date) print(security_verify_data) url ='http://www.300600900.cn/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', } key,value = cookie_date.split('=') session = requests.session() session.get(url=url,headers=headers) session.cookies.set(key,value) full_url = url + security_verify_data session.get(url=full_url,headers=headers) response = session.get(url,headers=headers) with open('product11.html','w')as f : f.write(response.content.decode())
function stringToHex(str) { var val = ""; for (var i = 0; i < str.length; i++) { if (val == "") val = str.charCodeAt(i).toString(16); else val += str.charCodeAt(i).toString(16); } return val; } var width = 1400; var height = 900; var screendate = width + "," + height; cookie_date = "srcurl=" + stringToHex('http://www.300600900.cn/'); security_verify_data = "/?security_verify_data=" + stringToHex(screendate);