zoukankan      html  css  js  c++  java
  • 【Java爬虫】爬取南通大学教务系统成绩计算绩点

      以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

      具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

               1.Get请求方法

    //创建一个浏览器客户端
    CloseableHttpClient httpClient = HttpClients.createDefault();
    //要Get的地址
    String url1="http://www.baidu.com";				
    //创建一个Get请求
    HttpGet baidu=new HttpGet(url1);
    //用上面创建的浏览器客户端执行该请求
    CloseableHttpResponse res=httpClient.execute(baidu);
    //用响应创建一个http实体并获得输入流
    HttpEntity he=res.getEntity();
    InputStream in=he.getContent();
    //将获得的流写到本地磁盘
    FileOutputStream out=new FileOutputStream("baidu.html'");
    byte[] buffer=new byte[1024];
    int count=-1;
    while((count=in.read(buffer))!=-1)
    {
    	out.write(buffer, 0, count);
    }
    in.close();
    out.close();
    

       2.Post请求方法

    CloseableHttpClient httpClient = HttpClients.createDefault();
    String url="http://××××.××××.com?#";	
    //要提交的参数username,password				
    List<NameValuePair> list = new ArrayList<NameValuePair>();
    list.add(new BasicNameValuePair("Username","Name"));
    list.add(new BasicNameValuePair("Password","××××××"));
    //转换编码
    UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
    //创建Post请求
    HttpPost httpPost=new HttpPost(url);
    //为请求设置参数
    httpPost.setEntity(entity);
    //获得响应,输入流并写入本地磁盘
    CloseableHttpResponse res=httpClient.execute(httpPost);
    HttpEntity he=res.getEntity();
    InputStream in=he.getContent();
    FileOutputStream out=new FileOutputStream("××××.×××");
    byte[] buffer=new byte[1024];
    int count=-1;
    while((count=in.read(buffer))!=-1)
    {
    	out.write(buffer, 0, count);
    }
    in.close();
    out.close();

    爬虫的完整代码:

    import org.apache.http.impl.client.CloseableHttpClient;
    import org.apache.http.impl.client.HttpClients;
    import org.apache.http.message.BasicNameValuePair;
    
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.UnsupportedEncodingException;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Scanner;
    import java.util.regex.Pattern;
    import java.util.regex.Matcher;
    
    import org.apache.http.HttpEntity;
    import org.apache.http.NameValuePair;
    import org.apache.http.client.ClientProtocolException;
    import org.apache.http.client.entity.UrlEncodedFormEntity;
    import org.apache.http.client.methods.*;
    
    
    public class spider02 {
    	public static void main(String[] args) throws ClientProtocolException, IOException
    	{
    		@SuppressWarnings("resource")
    		
    		Scanner cin=new Scanner(System.in);							
    		doon asd=new doon();
    		asd.getyzm();
    		String yzm=cin.nextLine();						//测试
    		String stop="1";
    		while(!stop.equals("#"))
    		{
    			stop=cin.nextLine();
    			System.out.println(stop);
    			if(stop.equals("n"))
    			{
    				Matcher name=asd.patternname(asd.getname());
    				while(name.find())
    					System.out.println(name.group(1));
    			}
    			if(stop.equals("s"))
    			{
    					Matcher score=asd.patternscore(asd.getscore());
    					List<lession>	les=	asd.workjidian(score);
    					double jdsum=0,xfsum=0;
    					for(int i=0;i<les.size();i++)
    					{
    						jdsum+=les.get(i).getKcxfjd();
    						xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();
    						System.out.println(les.get(i).getKcmc()+"	"+les.get(i).getZpcj()+"	"+les.get(i).getXf()+"	"+les.get(i).getKcxfjd());
    					}
    					System.out.println("所修课程学分:"+xfsum);
    					System.out.println("所修课程学分绩点:"+jdsum);
    					System.out.println("平均学分绩点:"+jdsum/xfsum);
    			}
    		}
    	}
    }
    class doon{
    	private CloseableHttpClient httpClient = HttpClients.createDefault();
    	public  void done(String xh,String sfzh,String kl,String yzm) 
    	{
    		try {
    			login(xh, sfzh, kl, yzm);	//尝试登陆
    			getscore();											//获取分数
    		} catch (ClientProtocolException e) {
    			e.printStackTrace();
    		} catch (IOException e) {
    			e.printStackTrace();
    		}
    	}
    	public String getname()
    	{
    		String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx";			//获取个人信息位置
    		String information="";
    		//Post请求
    		List<NameValuePair> list=new ArrayList<NameValuePair>();
    		list.add(new BasicNameValuePair("xq","2013-2014-1"));
    		try {
    			UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");
    			HttpPost post=new HttpPost(url);
    			post.setEntity(entity);
    			CloseableHttpResponse res= httpClient.execute(post);
    			HttpEntity he=res.getEntity();
    			InputStream in=he.getContent();
    			//FileOutputStream out=new FileOutputStream("");
    			byte[] buffer=new byte[1024];
    			int count=-1;
    			while((count=in.read(buffer))!=-1)
    			{
    				String inf=new String(buffer,0,count);
    				information+=inf;
    			}
    			in.close();
    		} catch (IOException e) {
    			// TODO Auto-generated catch block
    			e.printStackTrace();
    		}
    		return information;
    	}
    	public  void getyzm() throws IOException
    	{
    		//获得验证码并写到本地,Get请求
    		String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx";					//验证码页面
    		HttpGet yzm=new HttpGet(url1);
    		CloseableHttpResponse res=httpClient.execute(yzm);
    		HttpEntity he=res.getEntity();
    		InputStream in=he.getContent();
    		FileOutputStream out=new FileOutputStream("yzm.gif");
    		byte[] buffer=new byte[1024];
    		int count=-1;
    		while((count=in.read(buffer))!=-1)
    		{
    			out.write(buffer, 0, count);
    		}
    		in.close();
    		out.close();
    	}
    	public  void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException
    	{
    		//Post请求
    		String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx";								//登录页面
    		List<NameValuePair> list = new ArrayList<NameValuePair>();
    		list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));
    		list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));
    		list.add(new BasicNameValuePair("xh",xh));
    		list.add(new BasicNameValuePair("sfzh",sfzh));
    		list.add(new BasicNameValuePair("kl",kl));
    		list.add(new BasicNameValuePair("yzm",yzm));
    		UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
    		HttpPost httpPost=new HttpPost(url);
    		httpPost.setEntity(entity);
    		CloseableHttpResponse res=httpClient.execute(httpPost);
    		HttpEntity he=res.getEntity();
    		InputStream in=he.getContent();
    		FileOutputStream out=new FileOutputStream("ans.html");
    		byte[] buffer=new byte[1024];
    		int count=-1;
    		while((count=in.read(buffer))!=-1)
    		{
    			out.write(buffer, 0, count);
    		}
    		in.close();
    		out.close();
    	}
    	public  String getscore() throws ClientProtocolException, IOException
    	{
    		//Post请求
    		String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx";  //获取分数
    		List<NameValuePair> list = new ArrayList<NameValuePair>();
    		list.add(new BasicNameValuePair("start","0"));
    		list.add(new BasicNameValuePair("pageSize","80"));
    		UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); 
    		HttpPost httpPost=new HttpPost(url);
    		httpPost.setEntity(entity);
    		CloseableHttpResponse res=httpClient.execute(httpPost);
    		HttpEntity he=res.getEntity();
    		InputStream in=he.getContent();
    		FileOutputStream out=new FileOutputStream("score.html");
    		byte[] buffer=new byte[1024];
    		int count=-1;
    		String save="";
    		while((count=in.read(buffer))!=-1)
    		{
    			out.write(buffer, 0, count);
    			String sav=new String(buffer,0,count);
    			save+=sav;
    		}
    		in.close();
    		out.close();
    		return save;
    	}
    	public Matcher patternscore(String score)
    	{
    		//用正则表达式匹配成绩
    		String reg=""kcmc":"(.*?)","jsxm":"(.*?)","xq":"(.*?)","xs":"(.*?)","xf":"(.*?)","zpcj":"(.*?)","pscj":"(.*?)","qmcj":"(.*?)","kcsx":"(.*?)","cjid":"(.*?)","ksfsm":"(.*?)","pxcj":"(.*?)"}";
    		Pattern p=Pattern.compile(reg);
    		Matcher m=p.matcher(score);
    		return m;
    	}
    	public Matcher patternname(String name)
    	{
    		//匹配个人信息
    		String reg="<b>(.*?)</b>";
    		Pattern p=Pattern.compile(reg);
    		Matcher  m=p.matcher(name);
    		return m;
    	}
    	public List<lession> workjidian(Matcher score)
    	{
    		//计算绩点
    		List<lession> les=new ArrayList<lession>();
    		while(score.find())
    		{
    			double	xf=0.0;
    			if(score.group(6).equals("优"))				//五级计分
    				xf=Double.valueOf(score.group(5)).doubleValue()*4.5; 				
    			else if(score.group(6).equals("良"))
    				xf=Double.valueOf(score.group(5)).doubleValue()*3.5; 	
    			else if(score.group(6).equals("中"))
    				xf=Double.valueOf(score.group(5)).doubleValue()*2.5; 	
    			else if(score.group(6).equals("及格"))
    				xf=Double.valueOf(score.group(5)).doubleValue()*1.5; 	
    			else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))
    				continue;
    			else if(Double.valueOf(score.group(6)).doubleValue()>=90)					//百分计分
    				xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();
    			else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)	
    				xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();
    			else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)	
    				xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();
    			else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)	
    				xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();
    			else if(Double.valueOf(score.group(6)).doubleValue()<60)
    				continue;
    			les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));
    
    		//	System.out.println(score.group(1)+"						"+score.group(2)+"	"
    		//			+score.group(5)+"	"+score.group(6)+"学分"+Double.toString(xf));
    		}
    		return les;
    	}
    }



  • 相关阅读:
    框架_爬虫如何应用(爬虫架构设计与实现)
    架构师_设计模式_结构型_代理模式
    7语法基础_CLR核心解析
    架构师_设计模式_结构型_享元模式
    架构师_设计模式_结构型_组合模式
    同源策略、跨域、Same-origin、Cross-origin
    ES(elasticsearch) query DSL 查询语法
    日志框架
    powerdesigner 名称、注释互转
    maven 可运行 jar 包的打包,包含依赖 jar 包
  • 原文地址:https://www.cnblogs.com/A-yes/p/9894226.html
Copyright © 2011-2022 走看看