zoukankan html css js c++ java

8.20(day19)re,typing,collections模块，简单爬虫

复习

numpy模块

numpy数组(矩阵)的运算, 科学运算, tensorflow

pandas模块

文件(excel)的处理

read_excel()/to_excel()

matplotlib模块

画图, plt.plot()/plt.bar()/plt.scatter()/plt.hist()/plt.pie()

re模块

# re模块   从字符串中找特定的字符串
# import re
# s = '王大炮打炮被大炮打死了王大炮打炮被大炮打死了'
# # ^ 从开头查找
# print(re.findall('^王大炮',s))
# # $ 从结尾查找
# print(re.findall('死了$',s))
# # [] 匹配中间字符，只要单个字符
# s = 'asdfghjkl'
# print(re.findall('[as]',s))
# # [^] 对[]内元素取反
# print(re.findall('[^asd]',s))
# # . 任意字符(除了
)
# # print(re.findall('a..',s))
#
# s = 'asdasdaaasdaaaaa'
# # * 前面的字符0到无数个
# print(re.findall('a*',s))   #非它字符也要   算空
# # + 1到无穷个
# print(re.findall('a+',s))
# ? 0到1个
# print(re.findall('a?',s))
# {m} 匹配前面字符m个
# print(re.findall('a{5}',s))
# {n,m} 匹配前面字符n到m个
# print(re.findall('a{2,5}',s))

# d 数字
# s = '123asd456fgh'
# print(re.findall('d',s))
# D 非数字
# print(re.findall('D',s))
# w 数字字母下划线
# print(re.findall('w',s))
# W 非数字字母下划线
# print(re.findall('W',s))
# s 空格/	/

# s = 'da-d-fa   f
ak'
# print(re.findall('s',s))
# S 非空格
	
# print(re.findall('S',s))
# 取消意义
# s = 'as'
# print(re.findall(r'a\s',s))
# .*贪婪模式，一直寻找
# s = '123asdfgh45asasdada'
# print(re.findall('a.*s',s))
# .*? 非贪婪模式
# print(re.findall('a.*?s',s))
# ()   只要括号n内的
# A|B   ABA都要


# re模块的用法
 # re.compile
'''
修饰符	描述
re.I	使匹配对大小写不敏感
re.L	做本地化识别（locale-aware）匹配
re.M	多行匹配，影响 ^ 和 $
re.S	使 . 匹配包括换行在内的所有字符
re.U	根据Unicode字符集解析字符。这个标志影响 w, W, , B.
re.X	该标志通过给予你更灵活的格式以便你将正则表达式写得更易于理解。
'''
# s = 'asdfgh
jkl123'
# print(re.findall('d+',s))
# com = re.compile('d+')
# # par = 'd+'
# com = re.compile('3.')
# print(re.findall(com.s))

# re.split()   按照匹配规则切割
# re.sub()   按照匹配规则替换
# re.subn()   a按照匹配规则替换并计数
# # re.findall()拿出匹配的东西
# re.mathch()从开头搜索，找到打印，没找到就none
# re.seaarh()搜索到第一个就停止

typing模块

# typing模块:提供了三种数据类型   Generator   Iterable   Iterator   三种数据类型，限制函数
from typing import SupportsInt
def f(x:int ,y:int):
    return x+y
res=f(10,20)
print(res)
# 一些数据类型python不支持
# 参数数据类型
# 生成器：generator   可迭代对象迭代器对象
def func(i:int,f:float,b:bool,lt:list,tup:tuple,dic:dict):
    lis = [i,f,b,lt,tup,dic]
    return lis
res = func(1,2,True,[1,2],(1,2),6)    #不错误，不规范
print(res)
def func1(lt):
    print(lt[0])

collections模块

# collections模块：复杂的数据类型
# 有名元组
# p = (1,2)
# from collections import namedtuple
# point = namedtuple('point',['x','y'])
# print(p.x)
# print(p.y)

# # 默认字典
# from _collections  import defaultdict
# dic = defaultdict(lambda:'nan')   #dic={}
# dic['a']=1
# print(dic['a'])
# print(dic['c'])
#
# # 双端队列
# # lis = [1,2,3]
# # lis.append(4)
# # print(lis)
# from collections  import deque
# de = deque([1,2,3,])
# de.append(4)
# de.appendleft(0)
# print(de)
# 计数器
# from collections import Counter
# s = 'programming'
# dic = {}
# for i in s:
#     if i in dic:
#         dic[i]+=1
#     else:
#         dic[i]=1
# print(dic)
# c = Counter()   #字典
# for i in s:
#     c[i]+=1
# print(c)

简单爬虫

#简单爬虫
# import requests
# res = requests.get('http://duanziwang.com/')
# data = res.text
# # print(data)
# import re
# res = re.findall(' <div class="post-content">        <p>(.*?)</p>    </div>',data)
# for i in res:
#     print(i)
import re
import os
import requests
for i in range(1,9):
    url = f"http://www.xiaohuar.com/list-2-{i}.html"
    res = requests.get(url)
    data = res.text
    res = re.findall('src="(.*?.jpg)"',data)
    for i in res:
        if i.startswith(''):
            i = f"http://www.xiaohuar.com{i}"
            img_name = i.split('/')[-1]
            img_path = os.path.join('img',img_name)
            res = requests.get(i)
            img_content = res.content
            with open(img_path,'wb') as fw:
                fw.write(img_content)
                fw.flush()
                print(f"下载图片{img_name}成功")

查看全文

相关阅读:
谈一下ACM的入门书籍及方法
 acm总结帖_By AekdyCoin
楼天城楼教主的acm心路历程
 弱校ACM奋斗史
 【转】编程的浅学习与深学习
 HDOJ 1047 Integer Inquiry （大数）
【链性栈】表达式求值
 【链性栈】基本链性栈的实现
 Beta冲刺博客
 Alpha项目测试

原文地址：https://www.cnblogs.com/jiann/p/11529054.html