zoukankan      html  css  js  c++  java
  • 2018年6月10日笔记

    • html转pdf文件

    抓取aming的linux教程,抓取每个网页。

     1 import re
     2 import requests
     3 
     4 reg = re.compile(r"<h3>目录列表</h3>s+<ul>s+([sS]*?</ul>)")
     5 url = "http://www.apelearn.com/study_v2/"
     6 session = requests.session()
     7 r = session.get(url)
     8 # print(r.encoding)
     9 r.encoding = "utf-8"
    10 html = r.text
    11 # print(html)
    12 htmlli = reg.findall(html)
    13 # print(htmlli)
    14 regurl = re.compile(r'''href="(.*?)"''')
    15 if htmlli[0]:
    16     result = regurl.findall(htmlli[0])
    17     # print(result)
    18 
    19 urls = list()
    20 for i in result:
    21     url = "http://www.apelearn.com/study_v2/{0}".format(i)
    22     print(url)
    23     urls.append(urls)
    http://www.apelearn.com/study_v2/chapter1.html
    http://www.apelearn.com/study_v2/chapter2.html
    http://www.apelearn.com/study_v2/chapter3.html
    http://www.apelearn.com/study_v2/chapter4.html
    http://www.apelearn.com/study_v2/chapter5.html
    http://www.apelearn.com/study_v2/chapter6.html
    http://www.apelearn.com/study_v2/chapter7.html
    http://www.apelearn.com/study_v2/chapter8.html
    http://www.apelearn.com/study_v2/chapter9.html
    http://www.apelearn.com/study_v2/chapter10.html
    http://www.apelearn.com/study_v2/chapter11.html
    http://www.apelearn.com/study_v2/chapter12.html
    http://www.apelearn.com/study_v2/chapter13.html
    http://www.apelearn.com/study_v2/chapter14.html
    http://www.apelearn.com/study_v2/chapter15.html
    http://www.apelearn.com/study_v2/chapter16.html
    http://www.apelearn.com/study_v2/chapter17.html
    http://www.apelearn.com/study_v2/chapter18.html
    http://www.apelearn.com/study_v2/chapter19.html
    http://www.apelearn.com/study_v2/chapter20.html
    http://www.apelearn.com/study_v2/chapter21.html
    http://www.apelearn.com/study_v2/chapter22.html
    http://www.apelearn.com/study_v2/chapter23.html
    http://www.apelearn.com/study_v2/chapter24.html
    http://www.apelearn.com/study_v2/chapter25.html
    http://www.apelearn.com/study_v2/chapter26.html

    生成一个空白的PDF

     1 import codecs
     2 import PyPDF2
     3 import os
     4 
     5 files = list()
     6 for fileName in os.listdir("aming"):
     7     if fileName.endswith(".pdf"):
     8         files.append(fileName)
     9 
    10 newFiles = sorted(files, key=lambda d: int(d.split(".pdf")[0]))
    11 print(newFiles)
    12 
    13 
    14 os.chdir("aming")
    15 pdfWriter = PyPDF2.PdfFileWriter()#生成一个空白的pdf
    16 for item in newFiles:
    17     pdfReader = PyPDF2.PdfFileReader(open(item, "rb"))
    18     for page in range(pdfReader.numPages):
    19         pdfWriter.addPage(pdfReader.getPage(page))
    20 
    21 with codecs.open("aminglinux.pdf", "wb") as f:
    22     pdfWriter.write(f)
    []
    • python3使用模块PIL来处理图片
    from PIL import Image
    
    image = Image.open("test.jpg")
    print(image.format, image.size, image.mode)
    image.show()
    
    cutjpg = image.crop((320, 65, 460, 220))
    cutjpg.show()
    
    position = (320, 65, 460, 220)
    cutjpg = image.crop(position).transpose(Image.ROTATE_180)
    image.paste(cutjpg, position)
    image.show()
    
    (x, y) = image.size
    newx = 30
    newy = int(y*newx/x)
    newimage = image.resize((newx, newy))
    newimage.show()
    
    
    #image的方法
    #image.show()
    #image.open(file)
    #image.save(outputfile)
    #image.crop(left, upper, right, lower)#抠图
    
    # Image的几何处理:
    # out = im.resize((128, 128))                     #调整图片大小
    # out = im.rotate(45)                             #逆时针旋转 45 度角。
    # out = im.transpose(Image.FLIP_LEFT_RIGHT)       #左右对换。
    # out = im.transpose(Image.FLIP_TOP_BOTTOM)       #上下对换。
    # out = im.transpose(Image.ROTATE_90)             #旋转 90 度角。
    # out = im.transpose(Image.ROTATE_180)            #旋转 180 度角。
    # out = im.transpose(Image.ROTATE_270)            #旋转 270 度角。
     1 import random
     2 import string
     3 
     4 from PIL import Image, ImageFont, ImageDraw, ImageFilter
     5 
     6 font_path = "msyh.ttf"
     7 number = 4
     8 size = (100, 30)
     9 bgcolor = (255, 255, 255)
    10 fontcolor = (0, 0, 255)
    11 linecolor = (255, 0, 0)
    12 draw_line = True
    13 # 加入干扰线条数的上下限
    14 line_number = 30
    15 
    16 
    17 #生成一个随机字符串
    18 
    19 def getNumber():
    20     source = list(string.ascii_letters) + list(string.digits)
    21     return "".join(random.sample(source, number))
    22 
    23 #绘制干扰线
    24 def getLine(draw, width, height):
    25     begin = random.randint(0, width), random.randint(0, height)
    26     end  = random.randint(0, width), random.randint(0, height)
    27     draw.line([begin, end], fill=linecolor)
    28 
    29 def getCode():
    30     width, height = size
    31     image = Image.new("RGBA", size, bgcolor)
    32     font = ImageFont.truetype(font_path, 25)
    33     draw = ImageDraw.Draw(image)
    34     text = getNumber()
    35     font_width, font_height = font.getsize(text)
    36     draw.text(((width - font_width) / 2, (height - font_height) / 2), text, font=font, fill=fontcolor)  # 填充字符串
    37     if draw_line:
    38         for i in range(line_number):
    39             getLine(draw, width, height)
    40 
    41     # image = image.transform((width + 20, height + 10), Image.AFFINE, (1, -0.3, 0, -0.1, 1, 0), Image.BILINEAR)  # 创建扭曲
    42     image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)  # 滤镜,边界加强
    43     image.save('idencode.png')  # 保存验证码图片
    44     # image.show()
    45 if __name__ == '__main__':
    46     getCode()
  • 相关阅读:
    timestamp的两个属性:CURRENT_TIMESTAMP 和ON UPDATE CURRENT_TIMESTAMP
    python 典型文件结构
    PHP接口开发加密技术实例原理与例子
    一个高效的敏感词过滤方法(PHP)
    Thinkphp自动验证规则
    PHP解析xml文件时报错:I/O warning : failed to load external entity
    访问php网站报500错误时显示错误显示
    15个最受欢迎的Python开源框架
    分布式监控系统开发【day38】:报警策略设计(二)
    分布式监控系统开发【day38】:报警阈值程序逻辑解析(三)
  • 原文地址:https://www.cnblogs.com/karl-python/p/9170522.html
Copyright © 2011-2022 走看看