zoukankan      html  css  js  c++  java
  • python3小demo

      总结常用的功能小实例,快速学习并掌握python技能

    1.墨迹天气

    import requests
    from lxml.html import etree
    import json
    import time        # 导入模块
    class MoJiWeather():
        def city_name(self):  # 定义一个输入城市名称的函数
            cityname = str(input("输入城市名称:"))
            return cityname
        def search_city(city_name):# 搜索这个城市
            index_url = "http://tianqi.moji.com/api/citysearch/%s"%city_name   #  构造查询相应城市天气的url
            response = requests.get(index_url)
            response.encoding = "utf-8"
            try:# 异常捕获
                city_id = json.loads(response.text).get('city_list')[0].get('cityId')# 通过上面的url获取城市的id
                city_url = "http://tianqi.moji.com/api/redirect/%s"%str(city_id)  # 通过城市id获取城市天气
                return city_url
            except:
                print('城市名输入错误')
                exit()
        def parse(city_url):# 解析函数
            response = requests.get(city_url)
            response.encoding = 'utf-8'
            html = etree.HTML(response.text)
            current_city = html.xpath("//div[@class='search_default']/em/text()")[0]#    下面都是利用xpath解析的
            print('当前城市:'+current_city)
            current_kongqi = html.xpath("//div[@class='left']/div[@class='wea_alert clearfix']/ul/li/a/em/text()")[0]
            print('空气质量:'+current_kongqi)
            current_wendu = html.xpath("//div[@class='left']/div[@class='wea_weather clearfix']/em/text()")[0]
            print('当前温度:'+current_wendu+'')
            current_weather = html.xpath("//div[@class='wea_weather clearfix']/b/text()")[0]
            print('天气状况:' + current_weather)
            current_shidu = html.xpath("//div[@class='left']/div[@class='wea_about clearfix']/span/text()")[0]
            print('当前湿度:'+current_shidu)
            current_fengji = html.xpath("//div[@class='left']/div[@class='wea_about clearfix']/em/text()")[0]
            print('当前风速:'+current_fengji)
            jingdian = html.xpath("//div[@class='right']/div[@class='near'][2]/div[@class='item clearfix']/ul/li/a/text()")
            print('附近景点:')
            for j in jingdian:
                print('		'+j)
    if __name__ == '__main__':
        print("欢迎使用墨迹天气查询系统")
        city_name = MoJiWeather.city_name(1)
        city_url = MoJiWeather.search_city(city_name)
        MoJiWeather.parse(city_url)
        print("谢谢使用本查询系统")
        input("按任意键退出...")

    2.Tiobe排行榜

    import json
    from lxml import etree
    from lxml.etree import ParseError
    import requests
    from requests.exceptions import RequestException
    
    '''
        lxml实例应用
    '''
    
    '''
        获取页面数据
    '''
    def one_to_page(url):
        headers = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'
        }
    
        try:
            res = requests.get(url,headers=headers)
            body = res.text #获取网页内容
        except RequestException as e:
            print('request is error',e)
    
        try:
            html = etree.HTML(body,etree.HTMLParser())
            # tr 下的所有子孙节点(只获取文本数,图片资源不获取)
            result = html.xpath('//table[contains(@class,"table-top20")]/tbody/tr//text()')
    
            pos = 0
            for i in range(20):
                if i == 0:
                    yield result[i:5]
                else:
                    yield result[pos:pos+5]
                pos += 5
    
        except ParseError as e:
            print(e.position)
    
    
    '''
        写入文件
    '''
    def write_file(data):
        for item in data:
            sul = {
                '2018年6月排行': item[0],
                '2017年6排行': item[1],
                '开发语言': item[2],
                '评级': item[3],
                '变化率': item[4]
            }
            # with 更好处理异常情况,进行文件的关闭后续工作
            with open('test.txt','a',encoding='utf-8') as f:
                f.write(json.dumps(sul,ensure_ascii=False)+'
    ')
                f.close()
            print(sul)
        return None
    
    '''
        主程序
    '''
    def main():
        url = 'https://www.tiobe.com/tiobe-index/'
        data = one_to_page(url)
        ret = write_file(data)
        if ret == None:
            print('ok')
    
    if __name__ == '__main__':
        main()

    3.新闻列表

    '''
        墨迹天气文章爬虫
    '''
    import requests
    import json
    from lxml.html import etree
    from lxml.etree import ParseError
    
    '''
        解析页面内容
    '''
    def parseHtml(content):
        try:
            html = etree.HTML(content,etree.HTMLParser())
            # one = html.xpath('//ul[@class="advisory_list_item"]//text()')
            one = html.xpath('//ul[@class="advisory_list_item"]//li/a/@href')
            print(one)
            exit(0)
            LOOP = 8
            pos = 0
            for i in range(20):
                if i == 0:
                    yield one[0:LOOP]
                else:
                    yield one[pos:pos+LOOP]
                pos += LOOP
    
        except ParseError as e:
            print(e.position)
    
    '''
        写入文件
    '''
    def write_log(data):
        for item in data:
            msg = {
                '发文时间':item[3],
                '文章标题':item[5]
            }
            with open('moji.log','a',encoding='utf-8') as f:
                f.write(json.dumps(msg,ensure_ascii=False)+'
    ')
                f.close()
            print(msg)
        return None
    '''
        主程序
    '''
    def main():
    
        for page in range(1,73):
            url = 'https://tianqi.moji.com/news/list/moji/{}'.format(page)
            res = requests.get(url)
            res.encoding = 'utf-8'
            content = parseHtml(res.text)
            ret = write_log(content)
            if ret is None:
                print('ok')
    
    if __name__ == '__main__':
        main()

    4.爬取IP

    import requests
    import re
    import random
    
    from bs4 import BeautifulSoup
    
    ua_list = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36",
        "Mozilla / 5.0(Windows NT 6.1;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 45.0.2454.101Safari / 537.36"
        ]
    
    
    def ip_parse_xici(page):
        """
    
        :param page: 采集的页数
        :return:
        """
        ip_list = []
        for pg in range(1, int(page)):
            url = 'http://www.xicidaili.com/nn/' + str(pg)
            user_agent = random.choice(ua_list)
            my_headers = {
                'Accept': 'text/html, application/xhtml+xml, application/xml;',
                'Accept-Encoding': 'gzip, deflate, sdch',
                'Accept-Language': 'zh-CN,zh;q=0.8',
                'Referer': 'http: // www.xicidaili.com/nn',
                'User-Agent': user_agent
            }
            try:
                r = requests.get(url, headers=my_headers)
                soup = BeautifulSoup(r.text, 'html.parser')
            except requests.exceptions.ConnectionError:
                print('ConnectionError')
            else:
                data = soup.find_all('td')
                # 定义IP和端口Pattern规则
                ip_compile = re.compile(r'<td>(d+.d+.d+.d+)</td>')  # 匹配IP
                port_compile = re.compile(r'<td>(d+)</td>')  # 匹配端口
                ips = re.findall(ip_compile, str(data))  # 获取所有IP
    
                ports = re.findall(port_compile, str(data))  # 获取所有端口
                check_api = "http://ip.taobao.com/service/getIpInfo2.php?ip="
    
                for i in range(len(ips)):
                    if i < len(ips):
                        ip = ips[i]
                        api = check_api + ip
                        api_headers = {
                            'User-Agent': user_agent
                        }
                        try:
                            response = requests.get(url=api, headers=api_headers, timeout=2)
                            print("ip:%s 可用" % ip)
                        except Exception as e:
                            print("此ip %s 已失效:%s" % (ip, e))
                            del ips[i]
                            del ports[i]
                ips_usable = ips
                ip_list += [':'.join(n) for n in zip(ips_usable, ports)]  # 列表生成式
                print('第{}页ip采集完成'.format(pg))
        print(ip_list)
    
    
    if __name__ == '__main__':
        xici_pg = input("请输入需要采集的页数:")
        ip_parse_xici(page=xici_pg)
  • 相关阅读:
    代码结构
    linux 启动盘制作multisystem
    cmake 各种语法的作用
    leetcode Longest Consecutive Sequence
    leetcode find kth
    leetcode twoSum
    S3pool pytorch
    数学:优化:拉格朗日乘子法
    Fisher判别分析(线性判别分析——LDA)
    数学:优化:牛顿法
  • 原文地址:https://www.cnblogs.com/xingxia/p/python_demo.html
Copyright © 2011-2022 走看看