zoukankan      html  css  js  c++  java
  • 使用python抓取数据之菜鸟爬虫1

    '''
    Created on 2018-5-27
    
    @author: yaoshuangqi
    '''
    #本代码获取百度乐彩网站上的信息,只获取最近100期的双色球
    import urllib.request
    from bs4 import BeautifulSoup
    import random
    
    ere_hitlist = []
    hitlist = []
    def getSSQ100():
        #site = 'http://trend.lecai.com/ssq/redBaseTrend.action?recentPhase=100&onlyBody=false&phaseOrder=up&coldHotOrder=number'
        site ='https://www.17500.cn/ssq/'
        page = urllib.request.urlopen(site)
        html = page.read().decode('gb18030');#注意编码
        print(html)
        soup = BeautifulSoup(html,"html.parser")#指定html解析器
         
        hhlist = soup.find_all("td",class_="red_ball")
        bluelist = soup.find_all("td",class_="blue_ball")
         
        num = 0
        count = 0
        for tag in hhlist:
            global hitlist
            global ere_hitlist
            if num < 6:
                hitlist.append(tag.contents[0])
                if count == 599:
                    ere_hitlist.append(hitlist)
                    hitlist = []
            elif num == 6 :
                ere_hitlist.append(hitlist)
                hitlist = []
                num = 0
                hitlist.append(tag.contents[0])
            num+=1
            count+=1
        num = 0
        for sublist in ere_hitlist:
            sublist.append(bluelist[num].contents[0])
            num+=1
             
    def chooseSSQ():
        hhlist = []
        lhlist = []
        ylhlist = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33']
        ylllist = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16']
        ylhrlist = []
        yllrlist = []
        num = 0
        for curlist in ere_hitlist:
            for value in curlist:
                num+=1
                for ylval in ylhlist:
                    if ylval == value and len(curlist) == num:
                        yllrlist.append(value)
                    elif ylval == value and len(curlist) != num:
                        ylhrlist.append(value)
            num = 0
        print("红号:",len(ylhrlist),"蓝号:",len(yllrlist))
        
        if len(ylhrlist) == 600 and len(yllrlist) == 100:
            lh = random.randint(0,99)
            lhlist.append(ere_hitlist[lh][6])
             
            while len(hhlist) < 6:
                hh = random.randint(0,99)
                hhs = random.randint(0,5)
                hhlist.append(ere_hitlist[hh][hhs])
                hhlist = list(set(hhlist))
             
        elif len(ylhrlist) == 600 and len(yllrlist) != 100:
            lh = random.randint(0,len(yllrlist))
            lhlist.append(yllrlist[lh])
            lh = random.randint(0,15)
            lhlist.append(ylllist[lh])
     
            while len(hhlist) < 6:
                hh = random.randint(0,99)
                hhs = random.randint(0,5)
                hhlist.append(ere_hitlist[hh][hhs])
                hhlist = list(set(hhlist))
             
        elif len(ylhrlist) != 600 and len(yllrlist) == 100:
            lh = random.randint(0,99)
            lhlist.append(lh)
             
            while len(hhlist) < 3:
                hh = random.randint(0,len(ylhrlist))
                hhlist.append(ylhrlist[hh])
                hhlist = list(set(hhlist))
                 
            while len(hhlist) < 6:
                hh = random.randint(0,len(ylhlist))
                hhlist.append(ylhlist[hh])
                hhlist = list(set(hhlist))        
     
        elif len(ylhrlist) != 600 and len(yllrlist) != 100:
            lh = random.randint(0,len(yllrlist))
            lhlist.append(yllrlist[lh])
            lh = random.randint(0,15)
            lhlist.append(ylllist[lh])
             
            while len(hhlist) < 3:
                hh = random.randint(0,len(ylhrlist))
                hhlist.append(ylhrlist[hh])
                hhlist = list(set(hhlist))
                 
            while len(hhlist) < 6:
                hh = random.randint(0,len(ylhlist))
                hhlist.append(ylhlist[hh])
                hhlist = list(set(hhlist))
         
        print("根据前100期双色球中奖号码,本人预测下一期中奖号码是,红号:",hhlist,",蓝号:",lhlist)
     
    if  __name__ == '__main__':
            getSSQ100()
            chooseSSQ()
  • 相关阅读:
    Alibaba 阿里组件 nacos注册中心 gateway网关 flowable流程引擎 vue.js前后分离 spring cloud微服务
    Java JDBC 连接 MySQL8 数据库
    Java设计模式【命令模式】
    Spring Boot内嵌tomcat关于getServletContext().getRealPath获取得到临时路径的问题
    Java代理模式学习 (Proxy模式)
    Java单例模式浅析 (Singleton模式)
    由歌词引发的模式思考之中篇(AbstractFactory模式)
    Java模拟FilterChain的实现 (Chain Of Responsibility模式)
    由歌词引发的模式思考之上篇(FactoryMethod模式)
    由歌词引发的模式思考之下篇(模拟Spring的BeanFactory)
  • 原文地址:https://www.cnblogs.com/ysq0908/p/9097420.html
Copyright © 2011-2022 走看看