zoukankan      html  css  js  c++  java
  • pubmed_cookie 自动获取

    Python爬虫视频教程零基础小白到scrapy爬虫高手-轻松入门

    https://item.taobao.com/item.htm?spm=a1z38n.10677092.0.0.482434a6EmUbbW&id=564564604865

    #coding=utf-8
    import requests
    from bs4 import BeautifulSoup
    import urllib.request as ur
    import urllib.parse as par
    import re
    
    
    
    
    #cookie 自动获取
    #cookie_start = "entrezSort=pubmed:; "
    #cookie_end = "; _ga=GA1.2.82560226.1459065437; _gat=1; ncbi_prevPHID=3971636F6DCF48491459222804198SID; prevselfurl=http%3A//www.ncbi.nlm.nih.gov/; unloadnext=jsevent%3Dunloadnext%26ncbi_pingaction%3Dunload%26eventid%3D1%26jsperf_dns%3D3923%26jsperf_connect%3D252%26jsperf_ttfb%3D310%26jsperf_basePage%3D9%26jsperf_frontEnd%3D4645%26jsperf_navType%3D0%26jsperf_redirectCount%3D0%26maxScroll_x%3D0%26maxScroll_y%3D0%26currScroll_x%3D0%26currScroll_y%3D0%26hasScrolled%3Dfalse%26ncbi_phid%3D3971636F6DCF48491459222804198SID%26sgSource%3Dnative"
    pubmed_url = r'http://www.ncbi.nlm.nih.gov/pubmed'
    
    
    pubmed_r = requests.get(pubmed_url)
    print(pubmed_r.cookies)
    pubmed_cookie_dict = requests.utils.dict_from_cookiejar(pubmed_r.cookies)
    print(pubmed_cookie_dict)
    cookie = ";".join(["%s=%s" % (k, v) for k, v in pubmed_cookie_dict.items()])
    print(cookie)
    
    #heads
    heads={
        "Host":"www.ncbi.nlm.nih.gov",
        "Origin":"http://www.ncbi.nlm.nih.gov",
        "Referer":"http://www.ncbi.nlm.nih.gov/pubmed",
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36",
        "Cookie": cookie
    }
    
    kw_url="http://www.ncbi.nlm.nih.gov/pubmed/?term=bone+fracture+&EntrezSystem2.PEntrez.PubMed.Pubmed_PageController.PreviousPageName=results&EntrezSystem2.PEntrez.PubMed.Pubmed_PageController.SpecialPageName=&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.FacetsUrlFrag=filters%3D&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.FacetSubmitted=false&EntrezSystem2.PEntrez.PubMed.Pubmed_Facets.BMFacets=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sSort=none&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FFormat=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FSort=&email_format=docsum&email_sort=&email_count=20&email_start=1&email_address=&email_subj=bone+fracture+-+PubMed&email_add_text=&EmailCheck1=&EmailCheck2=&coll_start=1&citman_count=20&citman_start=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileFormat=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Presentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Sort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FileSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.Format=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.LastFormat=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevPageSize=200&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevPresentation=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.PrevSort=&CollectionStartIndex=1&CitationManagerStartIndex=1&CitationManagerCustomRange=false&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_ResultsController.ResultCount=188348&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_ResultsController.RunLastQuery=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.cPage=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.cPage=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPresentation2=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sPageSize2=20&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.sSort2=none&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FFormat2=docsum&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Pubmed_DisplayBar.FSort2=&email_format2=docsum&email_sort2=&email_count2=20&email_start2=1&email_address2=&email_subj2=bone+fracture+-+PubMed&email_add_text2=&EmailCheck1=&EmailCheck2=&coll_start2=1&citman_count2=20&citman_start2=1&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailReport=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailFormat=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailCount=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailStart=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailSort=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Email=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailSubject=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailText=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailQueryKey=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.EmailHID=1JUaNZm7_muMenLr1UCg7qnKsykz3SEoWOFpZKTJpg1OPsm8DTZY4SFI2IPzWw7ap95YjxnHe5u1jGZfR_IvM4TVVA2azjhpkrl&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.QueryDescription=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Key=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Answer=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.Holding=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.HoldingFft=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.HoldingNdiSet=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.OToolValue=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.EmailTab.SubjectList=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.TimelineAdPlaceHolder.CurrTimelineYear=&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.TimelineAdPlaceHolder.BlobID=NCID_1_330998348_130.14.18.34_9001_1459126857_1489724809_0MetA0_S_MegaStore_F_1&EntrezSystem2.PEntrez.DbConnector.Db=pubmed&EntrezSystem2.PEntrez.DbConnector.LastDb=pubmed&EntrezSystem2.PEntrez.DbConnector.Term=bone+fracture&EntrezSystem2.PEntrez.DbConnector.LastTabCmd=&EntrezSystem2.PEntrez.DbConnector.LastQueryKey=1&EntrezSystem2.PEntrez.DbConnector.IdsFromResult=&EntrezSystem2.PEntrez.DbConnector.LastIdsFromResult=&EntrezSystem2.PEntrez.DbConnector.LinkName=&EntrezSystem2.PEntrez.DbConnector.LinkReadableName=&EntrezSystem2.PEntrez.DbConnector.LinkSrcDb=&EntrezSystem2.PEntrez.DbConnector.Cmd=PageChanged&EntrezSystem2.PEntrez.DbConnector.TabCmd=&EntrezSystem2.PEntrez.DbConnector.QueryKey=&p%24a=EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.Page&p%24l=EntrezSystem2&p%24st=pubmed&EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.CurrPage=1"
    print(kw_url)
    
    r = requests.post(kw_url, headers=heads)
    soup = BeautifulSoup(r.content.decode('gbk', 'ignore'))
    #print(soup)
    pagitation = soup.find('input', id="pageno")['last']
    print('共有页面数:' + pagitation)
    

      

  • 相关阅读:
    [转] 疯狂的JSONP
    [转] ASP.NET MVC 模型绑定的功能和问题
    【JavaScript 从零开始】变量作用域
    【JavaScript 从零开始】 原始值和对象引用、类型转换
    【JavaScript 从零开始】 数字 文本 包装对象
    【JavaScript 从零开始】 语言核心部分----可选的分号
    解决 Jquery UI Tooltip 用在Select 的BUG
    大前端工具集
    前端知识体系2
    前端知识体系
  • 原文地址:https://www.cnblogs.com/webRobot/p/5363970.html
Copyright © 2011-2022 走看看