# 使用flask 搭建一个后端网站
from flask import Flask
from flask import request
app = Flask(__name__)
@app.route('/data', methods=['GET', 'POST'])
def hello_world():
if request.method == "GET":
print(request)
if request.method == 'POST':
# print(request)
print(request.form.to_dict() )
print(request.headers)
print(request.accept_charsets)
return {'data': 'data'}
if __name__ == '__main__':
app.run(debug=True)
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>
第十题---烟雾缭绕【难度:简单】
</title>
<script src="https://cdn.bootcdn.net/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
</head>
<body>
<h1>
目标:采集100页的全部数字,并计算所有数据加和。当然了,有一个并不太明显的反爬手段存在
</h1>
<button id="id">按钮</button>
</body>
<script type="text/javascript">
var url = "http://127.0.0.1:5000/data";
call = function (num) {
var list = {
"page": String(1),
};
$.ajax({
url: url,
dataType: "json",
async: true,
data: list,
type: "POST",
beforeSend: function (request) {
(function () {
})()
},
success: function (data) {
datas = data.data;
console.log(datas)
}
})
};
call(1);
</script>
copy 网页的代码做主要是想分析 beforesend 这个请求到底做了什么 最后找了一圈也没有发现的有啥 再翻了下js基础 发现是我想多了
这个函数什么东西都没有干
最后把请求头替换掉成功过关 具体检测的应该是请求头中的某一个 ,想要知道具体检测的是什么就把每一个请求头打上备注 看下少了哪个请求头访问会失败就完事了~
这个贴一个正则替换headers 跟一个爬虫老师学来的 具体是谁忘了。
(.): (.) 替换成 "$1":"$2",
最后贴上代码
import json
from typing import Dict, List
import browsercookie
import requests
from requests.cookies import RequestsCookieJar
## init for classes
session = requests.session()
chrome_cookie = browsercookie.chrome()
s = []
url = "http://www.python-spider.com/api/challenge10"
# url = 'http://127.0.0.1:5000/data'
def get_cookie_from_chrome(domain: str) -> List[Dict]:
"""
:param domain: the cookies your want to get from.
:return: a dict the contains cookies from the domain.
"""
l = []
for i in chrome_cookie:
if domain in i.domain:
l.append({'name': i.name, "value": i.value})
return l
def set_cookie(domain):
"""
:param domain: the domain going to set
:return: the instance of RequestsCookieJar contain the cookies of the domain you set
"""
cookie_jar = RequestsCookieJar()
cookies_list = get_cookie_from_chrome(domain=domain)
for cookie in cookies_list:
cookie_jar.set(cookie['name'], cookie['value'], domain=domain)
return cookie_jar
if __name__ == '__main__':
header = {
"Connection": "keep-alive",
"Content-Length": "6",
"Accept": "application/json, text/javascript, */*; q=0.01",
"Dnt": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "http://www.python-spider.com/api/challenge10",
"Sec-Fetch-Site": "cross-site",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
"Referer": "http://www.python-spider.com/api/challenge10",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh,en;q=0.9,zh-CN;q=0.8",
}
cookie_jar = set_cookie('www.python-spider.com')
for i in range(1, 101):
data = {"page": str(i)}
response = session.post(url, headers=header, cookies=cookie_jar, data={"page": str(i)})
print(response.text)
data1 = json.loads(response.text)['data']
s.extend([i['value'].strip('
') for i in data1])
print(s)
print(sum(int(i) for i in s))