zoukankan      html  css  js  c++  java
  • 下载网页的基本方法

    一、Java.net.URL

     1 import java.io.BufferedReader;
     2 import java.io.InputStreamReader;
     3 import java.net.URL;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream()));
     9         String line;
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while ((line = reader.readLine()) != null) {
    12             pageBuffer.append(line);
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    二、Scanner对象

     1 import java.io.InputStreamReader;
     2 import java.net.URL;
     3 import java.util.Scanner;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8"));
     9         scanner.useDelimiter("\z");
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while (scanner.hasNext()) {
    12            pageBuffer.append(scanner.next());
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    三、套接字

     1 import java.io.*;
     2 import java.net.Socket;
     3 
     4 public class RetrivePage {
     5     public static void main(String args[]) throws Exception {
     6         String host = "blog.csdn.net";
     7         String file = "/column.html";
     8         int port = 80;
     9         Socket s = new Socket(host, port);
    10         OutputStream out = s.getOutputStream();
    11         PrintWriter outw = new PrintWriter(out, false);
    12         outw.print("GET" + file + " HTTP/1.0
    ");
    13         outw.print("Accept:text/plain,text/html,text/*
    ");
    14         outw.print("
    ");
    15         outw.flush();
    16         InputStream in = s.getInputStream();
    17         InputStreamReader inr = new InputStreamReader(in);
    18         BufferedReader bufferedReader = new BufferedReader(inr);
    19         String line;
    20         while ((line = bufferedReader.readLine()) != null) {
    21             System.out.println(line);
    22         }
    23     }
    24 }

    四、HttpClient

     1 import org.apache.http.HttpEntity;
     2 import org.apache.http.HttpResponse;
     3 import org.apache.http.client.HttpClient;
     4 import org.apache.http.client.methods.HttpGet;
     5 import org.apache.http.impl.client.DefaultHttpClient;
     6 import org.apache.http.util.EntityUtils;
     7 public class RetrivePage {
     8     public static void main(String args[]) throws Exception {
     9         HttpClient httpClient=new DefaultHttpClient();
    10         HttpGet httpGet=new HttpGet("http://www.sina.com");
    11         HttpResponse response=httpClient.execute(httpGet);
    12         HttpEntity entity=response.getEntity();
    13         if(entity!=null){
    14             System.out.println(EntityUtils.toString(entity,"utf-8"));
    15             EntityUtils.consume(entity);
    16         }
    17         httpClient.getConnectionManager().shutdown();
    18     }
    19 }
  • 相关阅读:
    wget(转)
    852. Peak Index in a Mountain Array
    617. Merge Two Binary Trees
    814. Binary Tree Pruning
    657. Judge Route Circle
    861. Score After Flipping Matrix
    832. Flipping an Image
    461. Hamming Distance
    654. Maximum Binary Tree
    804. Unique Morse Code Words
  • 原文地址:https://www.cnblogs.com/w1570631036/p/5857962.html
Copyright © 2011-2022 走看看