zoukankan      html  css  js  c++  java
  • 网络爬虫Java实现抓取网页内容

    package 抓取网页;

    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;

    import org.apache.commons.httpclient.HttpClient;
    import org.apache.commons.httpclient.HttpException;
    import org.apache.commons.httpclient.HttpStatus;
    import org.apache.commons.httpclient.NameValuePair;
    import org.apache.commons.httpclient.methods.PostMethod;

    public class RetrivePage {

    private static HttpClient httpClient = new HttpClient();

    public static void main(String[] args) {
    //抓取猎兔的首页,并且输出出来
    try {
    RetrivePage.downloadPage("http://www.lietu.com");
    } catch (HttpException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }

    private static void downloadPage(String path) throws HttpException, IOException {
    System.out.println("123123");
    InputStream input = null;
    OutputStream output = null;
    //得到post方法
    PostMethod postMethod = new PostMethod(path);
    //设置post方法的参数
    NameValuePair[] postData = new NameValuePair[2];
    postData[0] = new NameValuePair("name","lietu");
    postData[1] = new NameValuePair("password","*****");
    //把参数添加到请求路径上去
    postMethod.addParameters(postData);
    //执行,返回状态码
    int statusCode = httpClient.executeMethod(postMethod);
    System.out.println(statusCode);
    if (statusCode == HttpStatus.SC_OK) {
    input = postMethod.getResponseBodyAsStream();
    //得到文件的名字
    String fileName = path.substring(path.lastIndexOf('/')+1);
    //获得文件的输出流
    System.out.println(fileName);
    output = new FileOutputStream(fileName);

    //输出到文件中
    int tempByte = -1;
    while ((tempByte = input.read()) > 0) {
    output.write(tempByte);
    }
    //关闭资源
    if (input != null) {
    input.close();
    }
    if (output != null) {
    output.close();
    }
    }
    }
    }

  • 相关阅读:
    Qt class加载头文件
    Qt 中KeyPressEvent获取不到Key_Space等事件
    如何选择开源许可证?
    C语言实现库函数汇总
    简单背包问题-递归非递归实现
    中点优先顺序遍历数组-递归非递归实现
    稀疏矩阵十字链表表示
    稀疏矩阵线性表示
    KMP模式匹配
    双向链表
  • 原文地址:https://www.cnblogs.com/airycode/p/5561015.html
Copyright © 2011-2022 走看看