zoukankan      html  css  js  c++  java
  • 黄聪:Python实现Discuz论坛的自动POST登录发贴回帖(转)

    #-*-coding:utf-8-*-
    import urllib2, urllib, cookielib
    import re
    import getpass
    import sqlite3
    import random
    import time

    class Discuz:
    def __init__(self,user,pwd,args):
    self.username
    = user
    self.password
    = pwd
    self.args
    = args
    self.regex
    = {
    'loginreg':'<input\s*type="hidden"\s*name="formhash"\s*value="([\w\W]+?)"\s*\/>',
    'replyreg':'<input\s*type="hidden"\s*name="formhash"\s*value="([\w\W]+?)"\s*\/>',
    'tidreg': '<tbody\s*id="normalthread_\d+">[\s\S]+?<span\s*id="thread_(\d+)">'
    }
    self.conn
    = None
    self.cur
    = None
    self.islogin
    = False
    self.login()
    self.InitDB()

    def login(self):
    try:
    loginPage
    = urllib2.urlopen(self.args['loginurl']).read()
    formhash
    = re.search(self.regex['loginreg'], loginPage)
    formhash
    = formhash.group(1)
    #print 'login formhash:', formhash
    print 'start login...'
    cj
    = cookielib.CookieJar()
    opener
    = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    user_agent
    = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Mozilla/4.0 \
    (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.507
    '
    opener.addheaders
    = [('User-agent', user_agent)]
    urllib2.install_opener(opener)
    logindata
    = urllib.urlencode({
    'cookietime': 2592000,
    'formhash': formhash,
    'loginfield':'username',
    'username': self.username,
    'password': self.password,
    'questionid': 0,
    'referer': self.args['referer']
    })
    request
    = urllib2.Request(self.args['loginsubmiturl'],logindata)
    response
    = urllib2.urlopen(request)
    self.islogin
    = True
    print 'login success...'
    except Exception,e:
    print 'loggin error: %s' % e

    def PostReply(self, fid, tid, content):
    try:
    sql
    = "select * from post where fid='%s' and tid='%s'" % (fid,tid)
    self.cur.execute(sql)
    if self.cur.rowcount == -1:
    tidurl
    = self.args['tidurl'] % tid
    replysubmiturl
    = self.args['replysubmiturl'] % (fid,tid)
    tidPage
    = urllib2.urlopen(tidurl).read()
    formhash
    = re.search(self.regex['replyreg'], tidPage)
    formhash
    = formhash.group(1)
    #print 'reply formhash:', formhash
    print 'start reply...'
    replydata
    = urllib.urlencode({
    'formhash': formhash,
    'message': content,
    'subject': '',
    'usesig':'1'
    })
    request
    = urllib2.Request(replysubmiturl,replydata)
    response
    = urllib2.urlopen(request)
    sql
    = "insert into post values ('%s', '%s', '%d')" % (fid, tid, 1)
    self.cur.execute(sql)
    self.conn.commit()
    print 'reply success for [%s]' % tidurl
    else:
    print 'Skip! Thread:%s is already replied...' % tid
    except Exception, e:
    print 'reply error: %s' % e

    def GetTids(self, fid):
    if self.islogin:
    fidurl
    = self.args['fidurl'] % fid
    response
    = urllib2.urlopen(fidurl)
    content
    = response.read()
    tids
    = re.findall(self.regex['tidreg'], content)
    return tids
    else:
    print 'Error Please Login...'

    def InitDB(self):
    self.conn
    = sqlite3.connect('data.db')
    self.cur
    = self.conn.cursor()
    sql
    = '''create table if not exists post (
    fid text,
    tid text,
    replied integer)
    '''
    self.cur.execute(sql)
    self.conn.commit()

    if __name__ == '__main__':
    username
    = raw_input('username:').strip()
    password
    = getpass.getpass('password:').strip()
    args
    = {
    'loginurl': 'http://www.xxx.com/logging.php?action=login',
    'loginsubmiturl': 'http://www.xxx.com/logging.php?action=login&loginsubmit=yes',
    'fidurl': 'http://www.xxx.com/forum-%s-1.html',
    'tidurl': 'http://www.xxx.com/thread-%s-1-1.html',
    'replysubmiturl': 'http://www.xxx.com/post.php?action=reply&replysubmit=yes&infloat=yes&handlekey=fastpost&fid=%s&tid=%s',
    'referer':'http://www.xxx.com/index.php'
    }
    dz
    = Discuz(username, password,args)
    fid
    = '45'
    tids
    = dz.GetTids('45')
    replylist
    = [
    u
    '不错,支持一下,呵呵',
    u
    '已阅,顶一下',
    u
    '看看,顶你,呵呵',
    u
    '多谢分享,顶一下',
    u
    '说的不错,支持一下',
    u
    '提着水桶到处转,哪里缺水哪里灌! ',
    u
    '你太油菜了!'
    ]
    for tid in tids:
    content
    = random.choice(replylist)
    content
    = content.encode('gbk')
    dz.PostReply(
    '45',tid, content)
    time.sleep(
    20)

    下面简单说下过程:
    首先是得到了login的post地址:http://www.xxx.com/logging.php?action=login&loginsubmit=yes
    几个关键的parameter是

    formhash
    cookietime
    formhash
    loginfield
    password
    questionid
    referer
    username

    • cookietime 浏览器自动给的是 2592000
    • loginfield 默认的username
    • password 密码
    • questionid 这个貌似是登录时的回答问题,这个论坛没有强制回答所以用默认的0
    • referer 这个则是引用地址 http://www.xxx.com/index.php
    • username 用户名
    • formhash 最后这个貌似这个是随机的,不固定,可也是个关键参数,所以就直接用正则查找之
    args = {
    'loginurl': 'http://www.xxx.com/logging.php?action=login',
    'loginsubmiturl': 'http://www.xxx.com/logging.php?action=login&amp;loginsubmit=yes',
    'fidurl': 'http://www.xxx.com/forum-%s-1.html',
    'tidurl': 'http://www.xxx.com/thread-%s-1-1.html',
    'replysubmiturl': 'http://www.xxx.com/post.php?action=reply&amp;replysubmit=yes&amp;infloat=yes&amp;handlekey=fastpost&amp;fid=%s&amp;tid=%s',
    'referer':'http://www.xxx.com/index.php'
    }
    • loginurl为登录面页,用于获得formhash的值
    • loginsubmiturl为post登录参数的地址
    • fidurl这个是版块的ID,url中%s那里即为fid,这样的url http://www.xxx.com/forum-45-1.html,fid即为45
    • tidurl是帖子的id,查找方法同上
    • replysubmiturl这个是回复帖子post参数的url,要定位一个帖子前提得知道fid和tid
    • referer这个是引用地址,用网站的首页即可

    原创文章,转载请注明: 转载自LazyHack.Net

  • 相关阅读:
    SOA概念误解实施要点
    Visual Studio 2008 和 .NET Framework 3.5 Service Pack 1 Beta 发布
    【翻译】使用LINQ来简化编程的7个技巧
    我对SOA的认识以及心得
    《SQL Server 2005范例代码查询辞典》出版
    Security Tutorials系列文章以及AJAX系列文章
    代朋友发招聘信息,C++程序员
    二叉树相关算法
    最近项目的一些心得(纯贴代码)
    大型互联网网站架构心得之一:分
  • 原文地址:https://www.cnblogs.com/huangcong/p/2165841.html
Copyright © 2011-2022 走看看