zoukankan      html  css  js  c++  java
  • day 18

    1、爬虫的例子

    #爬虫的例子(方法一)
    import re
    import urllib,request import urlopen
    
    def getPage(url):
        response = urlopen(url)
        return response.read().decode('utf-8')
    
    def parsePage(s):
        ret = re.findall(
            '<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>d+).*?<span class="title">(?P<title>.*?)</span>'
           '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',s,re.S)
        return ret
    
    def main(num):
        url = 'https://movie.douban.com/top250?start=%s&filter=' % num
        response_html = getPage(url)
        ret = parsePage(response_html)
        print(ret)
    
    count = 0
    for i in range(10):   # 10页
        main(count)
        count += 25
    
    # url 从网页上把代码搞下来
    # bytes decode ——> utf-8 网页内容就是我的待匹配字符串
    # ret = re.findall(正则,带匹配的字符串)  #ret是所有匹配到的内容组成的列表
    #爬虫的例子(方法一)
    import requests
    
    import re
    import json
    
    def getPage(url):
    
        response=requests.get(url)
        return response.text
    
    def parsePage(s):
        
        com=re.compile('<div class="item">.*?<div class="pic">.*?<em .*?>(?P<id>d+).*?<span class="title">(?P<title>.*?)</span>'
                       '.*?<span class="rating_num" .*?>(?P<rating_num>.*?)</span>.*?<span>(?P<comment_num>.*?)评价</span>',re.S)
    
        ret=com.finditer(s)
        for i in ret:
            yield {
                "id":i.group("id"),
                "title":i.group("title"),
                "rating_num":i.group("rating_num"),
                "comment_num":i.group("comment_num"),
            }
    
    def main(num):
    
        url='https://movie.douban.com/top250?start=%s&filter='%num
        response_html=getPage(url)
        ret=parsePage(response_html)
        print(ret)
        f=open("move_info7","a",encoding="utf8")
    
        for obj in ret:
            print(obj)
            data=json.dumps(obj,ensure_ascii=False)
            f.write(data+"
    ")
    
    if __name__ == '__main__':
        count=0
        for i in range(10):
            main(count)
            count+=25

    1、计算器

    #计算下面式子
    a = '1 - 2 * ( ( 6 0 -3 0  +(-40/5) * (9-2*5/3 + 7 /3*99/4*2998 +10 * 568/14 )) - (-4*3)/ (16-3*2) )'
    
    import re
    
    def format(new_equation):
        new_equation = new_equation.replace('+-','-')
        new_equation = new_equation.replace('--', '+')
        return new_equation
    
    def cal(val_son):
        '''加减乘除的计算'''
        #print(new_val)
        if '/' in val_son:
            a,b = val_son.split('/')
            return str(float(a)/float(b))
        elif '*' in val_son:
            a,b = val_son.split('*')
            return str(float(a)*float(b))
    
    def no_brackets(val):
        '''去括号'''
        new_val = val.strip('()')
        while True:
            ret = re.search('d+.?d*[*/]-?d+.?d*',new_val) #匹配第一个乘除
            if ret: #说明 表达式中海油乘除法
                val_son = ret.group()  #子表达式
                ret = cal(val_son)
                new_val = new_val.replace(val_son,ret)
                new_val = format(new_val)
            else:
                ret = re.findall('-?d+.?d*',new_val)
                sum = 0
                for i in ret:
                    sum += float(i)
                return str(sum)
    
    def func(new_equation):
        while True:
            val = re.search('([^()]+)',new_equation)
            if val:
                val = val.group()
                ret = no_brackets(val)
                new_equation = new_equation.replace(val,ret)
                new_equation = format(new_equation)
            else:
                return no_brackets(new_equation)
    
    a = input("请输入要计算的式子>>>")
    new_equation = a.replace(' ','')
    print(func(new_equation))
  • 相关阅读:
    解码.NET 2.0配置之谜(二)
    .NET (C#) Internals: Delegates (1)
    .NET (C#) Internals: Delegates (2)
    Windows Vista Beta 2 尝鲜
    assembly 需要 unload 和 update 的时候怎么办?测试工程
    让 NDoc 1.3 支持.NET 2.0 程序集,泛型输出和 Visual studio 2005 解决方案导入
    号召,有兴趣做博客园自己的网络游戏的请举手..
    将执行文件转化为bat批处理文件的工具(批处理文件方式提供)
    ISAPI Filter实现的防盗链程序终于完工
    1分钟破解3dState '学习版'得一些版权信息。
  • 原文地址:https://www.cnblogs.com/ysging/p/10107805.html
Copyright © 2011-2022 走看看