zoukankan      html  css  js  c++  java
  • python 获取提交表单网址内容(即需要密码网址)以财务网站为例

    import requests
    import re
    from functools import reduce
    import json
    import base64
    import time
    import os


    -------- 安装的python包---------------


    pwd = base64.b64decode("")    #64位解码

    loginMeta = {"username":'',"pwd":pwd} #用户名和密码

    msession = requests.Session()
    ret = msession.post("http://uis.shou.edu.cn/cas/login?isLoginService=11&service=http://ecampus.shou.edu.cn/c/portal/login",
    {
    "username":loginMeta['username'],
    "password":loginMeta['pwd'],
    "submit":""
    },allow_redirects = False)
    ret = msession.get(ret.headers['Location'],allow_redirects = False)
    Jsession = ret.headers['Set-Cookie'].split(';')[0]
    ret = msession.get(ret.headers['Location'],allow_redirects = False)

    Jheaders = {'Cookie':'COOKIE_SUPPORT=true; JSESSIONID=%s; GUEST_LANGUAGE_ID=zh_CN'%Jsession}

    getASessionUrl = ''
    ret = msession.get(getASessionUrl,headers=Jheaders,allow_redirects=False)
    while 'Location' in ret.headers:
    ret = msession.get(ret.headers['Location'])

    fwUrl = ""
    ret = msession.get(fwUrl,headers=Jheaders,allow_redirects=False)
    while 'Location' in ret.headers:
    ret = msession.get(ret.headers['Location'])
    ACookies = requests.utils.dict_from_cookiejar(msession.cookies)

    ret = msession.get('')

    ret = msession.get('')


    ------------登录部分长久保存cookie-------------------------------------
    def parseOrderInfo(content):
    content = content.replace(" ", '').replace(' ', '').replace(' ', ' ')
    eles = re.findall('<tr.{0,4}orderno="\d+".{900}', content, re.I)
    orders = []
    for ele in eles:
    p = re.subn("<td.*?>(.*?)</td>", " \1", ele) #subn替换函数
    p = re.subn("<input.*?/>", "", p[0])
    p = p[0]
    p = list(filter(lambda x: x if len(x.strip()) > 1 else None, p.split(' ')))
    print(p)
    if (len(p) > 6):
    cinfo = {}
    cinfo["orderId"] = p[1].strip()
    cinfo["project"] = p[2].strip()
    cinfo["reason"] = p[3].strip()
    cinfo["pay"] = p[4].strip()
    cinfo["date"] = p[6].strip()
    orders += [cinfo]
    else:
    raise Exception("too LONG order Description")

    return orders


    ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow',
    data={'DepartProject': '',
    'Depart': '',
    'depname': '',
    'Object': '',
    'projectname': '',
    'OrderStartTime': '',
    'OrderEndTime': '',
    'OrderNo': '',
    'OrderState': '1,2,3,4,5,8,-1',
    'ExpenBusinessType': '',
    'currentPageIndex': '1',
    'num': '1',
    'isture': 'false',
    'ProxyPerson': '',
    'OrderRemark': ''},
    headers={'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s'
    % (ACookies["ASP.NET_SessionId"],
    ACookies["SFP_Verify_Cookie"]),
    'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'})
    time.sleep(2)
    seaContent = ret.content.decode()
    orderInfo = parseOrderInfo(seaContent)
    orders = re.findall("SFP_ClaimsSelf/OrderQuery/PrintOrder?OrderNo=(\d+)", seaContent)
    # pages=2
    pages = int(re.findall("pagecount: '(\d*)'", seaContent)[0])
    if pages > 1:
    for i in range(1, pages + 1):
    if i == 1:
    ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow', #提交post表单
    data={'DepartProject': '',
    'Depart': '',
    'depname': '',
    'Object': '',
    'projectname': '',
    'OrderStartTime': '',
    'OrderEndTime': '',
    'OrderNo': '',
    'OrderState': '1,2,3,4,5,8,-1',
    'ExpenBusinessType': '',
    'currentPageIndex': '1',
    'num': '1',
    'isture': 'false',
    'ProxyPerson': '',
    'OrderRemark': ''},
    headers={'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s'
    % (ACookies["ASP.NET_SessionId"],
    ACookies["SFP_Verify_Cookie"]),
    'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'})
    else:
    ret = msession.post(url='http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderShow',
    data={'DepartProject': '',
    'Depart': '',
    'depname': '',
    'Object': '',
    'projectname': '',
    'OrderStartTime': '',
    'OrderEndTime': '',
    'OrderNo': '',
    'OrderState': '1,2,3,4,5,8,-1',
    'ExpenBusinessType': '',
    'currentPageIndex': '%d' % i,
    'num': '2',
    'isture': 'false',
    'ProxyPerson': '',
    'OrderRemark': ''},
    headers={'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s'
    % (ACookies["ASP.NET_SessionId"],
    ACookies["SFP_Verify_Cookie"]),
    'Referer': 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/OrderIndex',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'})
    seaContent = ret.content.decode()
    orders += re.findall("SFP_ClaimsSelf/OrderQuery/PrintOrder?OrderNo=(\d+)", seaContent)
    orderInfo += parseOrderInfo(seaContent)

    time.sleep(1)
    # orderprint
    for orderId in orders:
    Url = 'http://cwc1.shou.edu.cn:82/SFP_ClaimsSelf/OrderQuery/PrintOrder?OrderNo='
    printUrl = Url + orderId
    Number = int(i)

    # print(printUrl)
    result = requests.get(url=printUrl,
    headers={'Cookie': 'ASP.NET_SessionId=%s; SFP_Verify_Cookie=%s'
    % (ACookies["ASP.NET_SessionId"],
    ACookies["SFP_Verify_Cookie"]),
    'Referer': ret.url,
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    })
    address="E:/totally/FinancePDF"+"/" #pdf文件存储地址
    fileName=orderId+".pdf"
    if os.path.isfile(address+fileName):
    print(fileName+'文件已存在')
    else:
    with open(address+orderId+".pdf", "wb") as f:
    f.write(result.content)


    sumInfo = {"detail": orders}

    print(json.dumps(sumInfo, indent=4))

    ------------------提交表单部分(表单内容不能少,否则会停止爬取)--------------------------------------

    if __name__ == '__main__':
    parseOrderInfo()
    
    
     
     
     
  • 相关阅读:
    PO BO VO DTO POJO DAO DO这些Java中的概念分别指一些什么?
    前端面试题汇总(待续)
    vue lottie vue-lottie : 使用教程
    webstorm 换行时 代码不对齐
    webstorm 导出编辑器配置.editorconfig
    vue 查看dist文件里的结构
    vue-cli 生产打包
    element form 校验数组每一项
    typescript无法识别vue中的$refs
    mac 10.14.5 [vue create的时候 mkdir没有权限]
  • 原文地址:https://www.cnblogs.com/setname/p/8417737.html
Copyright © 2011-2022 走看看