zoukankan      html  css  js  c++  java
  • 下载网页的基本方法

    一、Java.net.URL

     1 import java.io.BufferedReader;
     2 import java.io.InputStreamReader;
     3 import java.net.URL;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream()));
     9         String line;
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while ((line = reader.readLine()) != null) {
    12             pageBuffer.append(line);
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    二、Scanner对象

     1 import java.io.InputStreamReader;
     2 import java.net.URL;
     3 import java.util.Scanner;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8"));
     9         scanner.useDelimiter("\z");
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while (scanner.hasNext()) {
    12            pageBuffer.append(scanner.next());
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    三、套接字

     1 import java.io.*;
     2 import java.net.Socket;
     3 
     4 public class RetrivePage {
     5     public static void main(String args[]) throws Exception {
     6         String host = "blog.csdn.net";
     7         String file = "/column.html";
     8         int port = 80;
     9         Socket s = new Socket(host, port);
    10         OutputStream out = s.getOutputStream();
    11         PrintWriter outw = new PrintWriter(out, false);
    12         outw.print("GET" + file + " HTTP/1.0
    ");
    13         outw.print("Accept:text/plain,text/html,text/*
    ");
    14         outw.print("
    ");
    15         outw.flush();
    16         InputStream in = s.getInputStream();
    17         InputStreamReader inr = new InputStreamReader(in);
    18         BufferedReader bufferedReader = new BufferedReader(inr);
    19         String line;
    20         while ((line = bufferedReader.readLine()) != null) {
    21             System.out.println(line);
    22         }
    23     }
    24 }

    四、HttpClient

     1 import org.apache.http.HttpEntity;
     2 import org.apache.http.HttpResponse;
     3 import org.apache.http.client.HttpClient;
     4 import org.apache.http.client.methods.HttpGet;
     5 import org.apache.http.impl.client.DefaultHttpClient;
     6 import org.apache.http.util.EntityUtils;
     7 public class RetrivePage {
     8     public static void main(String args[]) throws Exception {
     9         HttpClient httpClient=new DefaultHttpClient();
    10         HttpGet httpGet=new HttpGet("http://www.sina.com");
    11         HttpResponse response=httpClient.execute(httpGet);
    12         HttpEntity entity=response.getEntity();
    13         if(entity!=null){
    14             System.out.println(EntityUtils.toString(entity,"utf-8"));
    15             EntityUtils.consume(entity);
    16         }
    17         httpClient.getConnectionManager().shutdown();
    18     }
    19 }
  • 相关阅读:
    jQuery及javascript DOM创建节点(三)
    jQueryEasyUI Window的基本使用
    3.1、值类型
    手动依赖注入(二)
    3.1.2、字符类型
    不错不错
    我们应该讨论什么? 就面向对象的讨论所引发的一些思考
    保存个地址, 顺便问个问题~
    嗯嗯, 今天很高兴
    方法级AOP: 又一个补丁
  • 原文地址:https://www.cnblogs.com/w1570631036/p/5857962.html
Copyright © 2011-2022 走看看