原创
[python]
#!/usr/bin/env python2.7
#coding: utf-8
import smtplib, sys, os, re, urllib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.image import MIMEImage
from email.Header import Header
sender = 'from@126.com'
receiver = 'to@126.com'
smtpserver = 'smtp.126.com'
username = 'username'
password = 'password'
savepath = './img'
reload(sys)
sys.setdefaultencoding('utf-8')
def readhtml(url):
html = urllib.urlopen(url).read().decode('utf-8');
# 先解析出当前是第几页 <span class="current-comment-page">[页数]</span>
match_page = re.search(r'"current-comment-page">\[(?P<page>.*?)\]</span>', html, re.I)
page = match_page.group('page')
print 'page=' + page
result = []
# 解析页面中 时间,楼层,LINK,文字描述,图片
# <li id="comment-楼层">
# @</a>时间</span>
# <a href="LINK">#
# <p>文字描述<img
# <img src="图片" />
rc_context = re.compile(r'<li id="comment-(?P<floor>.*?)">[\s\S]*?@</a>(?P<time>.*?)</span>[\s\S]*?<a href="(?P<link>.*?)">#[\s\S]*?<p>(?P<text>[\s\S]*?)<img src="(?P<image>.*?)"[\s\S]*?</li>', re.I)
for mach_context in rc_context.finditer(html):
floor = mach_context.group('floor')
time = mach_context.group('time')
link = mach_context.group('link')
text = '<p>' + mach_context.group('text').strip()
image = mach_context.group('image').strip()
#imageName = image[image.rindex('/')+1:]
#imageName = floor + imageName[imageName.rindex(".")+1:]
#print '>> ' + imageName[imageName.rindex('.')+1:]
urllib.urlretrieve(image, os.path.join(savepath, floor)) # 下载图片放在临时目录
dict = {'floor':floor, 'time':time, 'link':link, 'text':text}
result.append(dict)
print floor
return result
def buildmail(infos):
msgRoot = MIMEMultipart('related')
msgRoot['Subject'] = Header(unicode('煎蛋-妹子图', 'utf-8'), 'utf-8')
context = ''
for info in infos:
context += '<a href="{0}">{1}</a>{2}{3}<br><img src="cid:{4}"><hr>'.format(info['link'], info['floor'], info['time'], info['text'], info['floor'])
# 添加附件
fp = open(os.path.join(savepath, info['floor']), 'rb')
msgImage = MIMEImage(fp.read())
fp.close()
msgImage.add_header('Content-ID', '<{0}>'.format(info['floor']))
msgRoot.attach(msgImage)
msgRoot.attach(MIMEText(context, _subtype='html', _charset='utf-8'))
return msgRoot
if __name__=='__main__':
if not os.path.exists(savepath) :
os.mkdir(savepath)
result = readhtml('http://jandan.net/ooxx')
mailbody = buildmail(result)
smtp = smtplib.SMTP()
smtp.connect(smtpserver)
smtp.login(username, password)
smtp.sendmail(sender, receiver, mailbody.as_string())
smtp.quit()
print 'OK'
[/python]