zoukankan      html  css  js  c++  java
  • 03爬虫 爬取hfutxc成绩

     1 #-*- coding:utf-8 -*-
     2 # -*- coding: utf-8 -*-
     3 #encoding:utf-8
     4 import urllib
     5 import urllib2
     6 import cookielib
     7 import re
     8 
     9 
    10 class SDU:
    11 
    12     def __init__(self):
    13         self.loginUrl = 'http://222.195.8.201/pass.asp'
    14         self.gradeUrl = 'http://222.195.8.201/student/asp/Select_Success.asp'
    15         self.cookies = cookielib.CookieJar()
    16         self.postdata = urllib.urlencode({
    17             'UserStyle':'student',
    18             'user':'2013217314',
    19             'password':'#######'
    20          })
    21         self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies))
    22 
    23     def getPage(self):
    24         request  = urllib2.Request(
    25             url = self.loginUrl,
    26             data = self.postdata)
    27         result = self.opener.open(request)
    28         result = self.opener.open(self.gradeUrl)
    29         return result.read().decode('gbk')
    30         #打印登录内容
    31         #print result.read().decode('gbk')
    32 
    33     def getGrades(self):
    34           #获得本学期成绩页面
    35           page = self.getPage()
    36           #正则匹配
    37           myItems = re.findall('<TR bgcolor.*?<TD>.*?</TD>.*?<TD>(.*?)</TD>.*?<TD align="center">.*?</TD>.*?<TD align="center">(.*?)</TD>.*?</TR>',page,re.S)
    38           for item in myItems:
    39               print item[0]+'  '+item[1].strip()+' '
    40               #self.credit.append(item[0].encode('gbk'))
    41               #self.grades.append(item[1].encode('gbk'))
    42           #self.getGrade()
    43 
    44 
    45 sdu = SDU()
    46 sdu.getPage()
    47 sdu.getGrades()
  • 相关阅读:
    mybatis+sql语句
    坐标转换
    sql语句含中文JDBC查询不到
    架构阅读笔记16
    架构阅读笔记15
    架构阅读笔记14
    windows安装imgaug包报错中Shapely
    java除法
    Java输入输出问题复习
    java自学,基础,入门
  • 原文地址:https://www.cnblogs.com/cnblogs321114287/p/6984581.html
Copyright © 2011-2022 走看看