zoukankan      html  css  js  c++  java
  • 下载网页的基本方法

    一、Java.net.URL

     1 import java.io.BufferedReader;
     2 import java.io.InputStreamReader;
     3 import java.net.URL;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         BufferedReader reader = new BufferedReader(new InputStreamReader(pageURL.openStream()));
     9         String line;
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while ((line = reader.readLine()) != null) {
    12             pageBuffer.append(line);
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    二、Scanner对象

     1 import java.io.InputStreamReader;
     2 import java.net.URL;
     3 import java.util.Scanner;
     4 
     5 public class RetrivePage {
     6     public static String downloadPage(String path) throws Exception {
     7         URL pageURL = new URL(path);
     8         Scanner scanner = new Scanner(new InputStreamReader(pageURL.openStream(), "utf-8"));
     9         scanner.useDelimiter("\z");
    10         StringBuilder pageBuffer = new StringBuilder();
    11         while (scanner.hasNext()) {
    12            pageBuffer.append(scanner.next());
    13         }
    14         return pageBuffer.toString();
    15     }
    16 
    17     public static void main(String args[]) throws Exception {
    18         System.out.println(RetrivePage.downloadPage("http://www.sina.com"));
    19     }
    20 }

    三、套接字

     1 import java.io.*;
     2 import java.net.Socket;
     3 
     4 public class RetrivePage {
     5     public static void main(String args[]) throws Exception {
     6         String host = "blog.csdn.net";
     7         String file = "/column.html";
     8         int port = 80;
     9         Socket s = new Socket(host, port);
    10         OutputStream out = s.getOutputStream();
    11         PrintWriter outw = new PrintWriter(out, false);
    12         outw.print("GET" + file + " HTTP/1.0
    ");
    13         outw.print("Accept:text/plain,text/html,text/*
    ");
    14         outw.print("
    ");
    15         outw.flush();
    16         InputStream in = s.getInputStream();
    17         InputStreamReader inr = new InputStreamReader(in);
    18         BufferedReader bufferedReader = new BufferedReader(inr);
    19         String line;
    20         while ((line = bufferedReader.readLine()) != null) {
    21             System.out.println(line);
    22         }
    23     }
    24 }

    四、HttpClient

     1 import org.apache.http.HttpEntity;
     2 import org.apache.http.HttpResponse;
     3 import org.apache.http.client.HttpClient;
     4 import org.apache.http.client.methods.HttpGet;
     5 import org.apache.http.impl.client.DefaultHttpClient;
     6 import org.apache.http.util.EntityUtils;
     7 public class RetrivePage {
     8     public static void main(String args[]) throws Exception {
     9         HttpClient httpClient=new DefaultHttpClient();
    10         HttpGet httpGet=new HttpGet("http://www.sina.com");
    11         HttpResponse response=httpClient.execute(httpGet);
    12         HttpEntity entity=response.getEntity();
    13         if(entity!=null){
    14             System.out.println(EntityUtils.toString(entity,"utf-8"));
    15             EntityUtils.consume(entity);
    16         }
    17         httpClient.getConnectionManager().shutdown();
    18     }
    19 }
  • 相关阅读:
    28完全背包+扩展欧几里得(包子凑数)
    HDU 3527 SPY
    POJ 3615 Cow Hurdles
    POJ 3620 Avoid The Lakes
    POJ 3036 Honeycomb Walk
    HDU 2352 Verdis Quo
    HDU 2368 Alfredo's Pizza Restaurant
    HDU 2700 Parity
    HDU 3763 CDs
    POJ 3279 Fliptile
  • 原文地址:https://www.cnblogs.com/w1570631036/p/5857962.html
Copyright © 2011-2022 走看看