zoukankan      html  css  js  c++  java
  • Java jsoup获取网页中的图片

    获取图片

    package com.vfsd.net;
    
    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.InputStream;
    import java.io.OutputStream;
    import java.net.HttpURLConnection;
    import java.net.URL;
    import java.text.DateFormat;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;
    
    import javax.servlet.ServletException;
    import javax.servlet.annotation.WebServlet;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;
    
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.select.Elements;
    
    /**
     * Servlet implementation class GetImgFromHtml
     */
    @WebServlet("/GetImgFromHtml")
    public class GetImgFromHtml extends HttpServlet {
        private static final long serialVersionUID = 1L;
           
        /**
         * @see HttpServlet#HttpServlet()
         */
        public GetImgFromHtml() {
            super();
            // TODO Auto-generated constructor stub
        }
    
        /**
         * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            //response.getWriter().append("Served at: ").append(request.getContextPath());
            getDataHTML1();
        }
    
        /**
         * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
         */
        protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
            // TODO Auto-generated method stub
            doGet(request, response);
        }
        
        public static void getDataHTML1() throws IOException{
            String aHrefStr = "http://www.xxx.com/youji/123.html";
            System.out.println(aHrefStr);
            
            String dirNameIndex = "1";
            System.out.println(dirNameIndex);
            int dataSize1 = getImg(aHrefStr,dirNameIndex);
            //int dataSize2 = getImg1(aHrefStr,dirNameIndex);
            
        }
        
        public static int getImg(String stringUrl,String dirNameParam) throws IOException {
            String agent1 = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36";
            
            Document documentRoot = Jsoup.connect(stringUrl).userAgent(agent1).get();
            
            //System.out.println(documentRoot);
            
            //Elements imgElements = documentRoot.getElementsByClass("lazy");
            Elements imgElements = documentRoot.getElementsByTag("img");
            List<String> list = new ArrayList<String>();
            
            System.out.println("==="+imgElements.size());
            int i = 0;
            for (Element imgElement : imgElements) {
                //imgElement.getElementsByClass("");
                String imgSrcStr = imgElement.attr("data-original");//获取到src的值
                String imgUrl="";
                
                String imgName = getImgNameByTime(dirNameParam);
           System.out.println(imgName);
        } 
      }

    }
  • 相关阅读:
    Macbook pro从购买服务器到搭建服务器环境(1)
    单元测试踩到的坑
    pycharm中连接公网IP方法
    C++混合编程之idlcpp教程Lua篇(5)
    C++混合编程之idlcpp教程Python篇(4)
    C++混合编程之idlcpp教程Lua篇(4)
    C++混合编程之idlcpp教程Python篇(3)
    C++混合编程之idlcpp教程Lua篇(3)
    C++混合编程之idlcpp教程Python篇(2)
    C++混合编程之idlcpp教程Lua篇(2)
  • 原文地址:https://www.cnblogs.com/herd/p/11991752.html
Copyright © 2011-2022 走看看