- html转pdf文件
抓取aming的linux教程,抓取每个网页。
1 import re 2 import requests 3 4 reg = re.compile(r"<h3>目录列表</h3>s+<ul>s+([sS]*?</ul>)") 5 url = "http://www.apelearn.com/study_v2/" 6 session = requests.session() 7 r = session.get(url) 8 # print(r.encoding) 9 r.encoding = "utf-8" 10 html = r.text 11 # print(html) 12 htmlli = reg.findall(html) 13 # print(htmlli) 14 regurl = re.compile(r'''href="(.*?)"''') 15 if htmlli[0]: 16 result = regurl.findall(htmlli[0]) 17 # print(result) 18 19 urls = list() 20 for i in result: 21 url = "http://www.apelearn.com/study_v2/{0}".format(i) 22 print(url) 23 urls.append(urls)
http://www.apelearn.com/study_v2/chapter1.html http://www.apelearn.com/study_v2/chapter2.html http://www.apelearn.com/study_v2/chapter3.html http://www.apelearn.com/study_v2/chapter4.html http://www.apelearn.com/study_v2/chapter5.html http://www.apelearn.com/study_v2/chapter6.html http://www.apelearn.com/study_v2/chapter7.html http://www.apelearn.com/study_v2/chapter8.html http://www.apelearn.com/study_v2/chapter9.html http://www.apelearn.com/study_v2/chapter10.html http://www.apelearn.com/study_v2/chapter11.html http://www.apelearn.com/study_v2/chapter12.html http://www.apelearn.com/study_v2/chapter13.html http://www.apelearn.com/study_v2/chapter14.html http://www.apelearn.com/study_v2/chapter15.html http://www.apelearn.com/study_v2/chapter16.html http://www.apelearn.com/study_v2/chapter17.html http://www.apelearn.com/study_v2/chapter18.html http://www.apelearn.com/study_v2/chapter19.html http://www.apelearn.com/study_v2/chapter20.html http://www.apelearn.com/study_v2/chapter21.html http://www.apelearn.com/study_v2/chapter22.html http://www.apelearn.com/study_v2/chapter23.html http://www.apelearn.com/study_v2/chapter24.html http://www.apelearn.com/study_v2/chapter25.html http://www.apelearn.com/study_v2/chapter26.html
生成一个空白的PDF
1 import codecs 2 import PyPDF2 3 import os 4 5 files = list() 6 for fileName in os.listdir("aming"): 7 if fileName.endswith(".pdf"): 8 files.append(fileName) 9 10 newFiles = sorted(files, key=lambda d: int(d.split(".pdf")[0])) 11 print(newFiles) 12 13 14 os.chdir("aming") 15 pdfWriter = PyPDF2.PdfFileWriter()#生成一个空白的pdf 16 for item in newFiles: 17 pdfReader = PyPDF2.PdfFileReader(open(item, "rb")) 18 for page in range(pdfReader.numPages): 19 pdfWriter.addPage(pdfReader.getPage(page)) 20 21 with codecs.open("aminglinux.pdf", "wb") as f: 22 pdfWriter.write(f)
[]
- python3使用模块PIL来处理图片
from PIL import Image image = Image.open("test.jpg") print(image.format, image.size, image.mode) image.show() cutjpg = image.crop((320, 65, 460, 220)) cutjpg.show() position = (320, 65, 460, 220) cutjpg = image.crop(position).transpose(Image.ROTATE_180) image.paste(cutjpg, position) image.show() (x, y) = image.size newx = 30 newy = int(y*newx/x) newimage = image.resize((newx, newy)) newimage.show() #image的方法 #image.show() #image.open(file) #image.save(outputfile) #image.crop(left, upper, right, lower)#抠图 # Image的几何处理: # out = im.resize((128, 128)) #调整图片大小 # out = im.rotate(45) #逆时针旋转 45 度角。 # out = im.transpose(Image.FLIP_LEFT_RIGHT) #左右对换。 # out = im.transpose(Image.FLIP_TOP_BOTTOM) #上下对换。 # out = im.transpose(Image.ROTATE_90) #旋转 90 度角。 # out = im.transpose(Image.ROTATE_180) #旋转 180 度角。 # out = im.transpose(Image.ROTATE_270) #旋转 270 度角。
1 import random 2 import string 3 4 from PIL import Image, ImageFont, ImageDraw, ImageFilter 5 6 font_path = "msyh.ttf" 7 number = 4 8 size = (100, 30) 9 bgcolor = (255, 255, 255) 10 fontcolor = (0, 0, 255) 11 linecolor = (255, 0, 0) 12 draw_line = True 13 # 加入干扰线条数的上下限 14 line_number = 30 15 16 17 #生成一个随机字符串 18 19 def getNumber(): 20 source = list(string.ascii_letters) + list(string.digits) 21 return "".join(random.sample(source, number)) 22 23 #绘制干扰线 24 def getLine(draw, width, height): 25 begin = random.randint(0, width), random.randint(0, height) 26 end = random.randint(0, width), random.randint(0, height) 27 draw.line([begin, end], fill=linecolor) 28 29 def getCode(): 30 width, height = size 31 image = Image.new("RGBA", size, bgcolor) 32 font = ImageFont.truetype(font_path, 25) 33 draw = ImageDraw.Draw(image) 34 text = getNumber() 35 font_width, font_height = font.getsize(text) 36 draw.text(((width - font_width) / 2, (height - font_height) / 2), text, font=font, fill=fontcolor) # 填充字符串 37 if draw_line: 38 for i in range(line_number): 39 getLine(draw, width, height) 40 41 # image = image.transform((width + 20, height + 10), Image.AFFINE, (1, -0.3, 0, -0.1, 1, 0), Image.BILINEAR) # 创建扭曲 42 image = image.filter(ImageFilter.EDGE_ENHANCE_MORE) # 滤镜,边界加强 43 image.save('idencode.png') # 保存验证码图片 44 # image.show() 45 if __name__ == '__main__': 46 getCode()