zoukankan      html  css  js  c++  java
  • Java 通过URL获取页面快照十种方式解读

    需求:通过URL超链接获取超链接的页面快照

    查询翻阅各种资料 ,找到了十种方式, 但是这十种方式都有各自的独特性,以下是我对其测试结果

    方法一:java本身的API

    不需要引用任何的jar包,不支持写在style里面的css和外置的css文件,只能写在标签上,还有也不支持js只能是html文件的url地址。

    首先对一本地静态页面测试

    原图结果图(结果图有裁剪)对比:

    对应案例:

    package cn.cosmos.page;
    
    import javax.imageio.ImageIO;
    import javax.swing.*;
    import java.awt.image.BufferedImage;
    import java.awt.image.RenderedImage;
    import java.io.File;
    import java.net.URL;
    
    /**
     * Created with CosmosRay
     *
     * @author CosmosRay
     * @date 2019/5/21
     * Function:
     */
    public class JavaApi {
        public static void generateOutput() throws Exception {
            String url = new File("E:/test/index.html").toURI().toString();
            JEditorPane ed = new JEditorPane(new URL(url));
            ed.setSize(1000,1300);
            Thread.sleep(5000);
            //create a new image
            BufferedImage image = new BufferedImage(ed.getWidth(), ed.getHeight(),
                    BufferedImage.TYPE_INT_ARGB);
            SwingUtilities.paintComponent(image.createGraphics(),
                    ed,
                    new JPanel(),
                    0, 0, image.getWidth(), image.getHeight());
            //save the image to file
            ImageIO.write((RenderedImage)image, "png", new File("E:/test/javacore.png"));
        }
        public static void main(String[] args) {
            try {
                generateOutput();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    

    方法二:Cobra

    Cobra 是一个纯 Java 的HTML 解析和生成器,支持 HTML 4 、JavaScript、CSS 2。

    效果图:

    源码:

    方法三:DjNativeSwing

    只有这个支持js,css,但是网页超过一定高度,就会截取一半;如果是接口调用的话,因为是线程,所以这个只能运行一次就自动关闭了;使用main方法运行就没有问题--多线程问题

    效果图:

     所需Maven文件

    <dependency>
          <groupId>com.hynnet</groupId>
          <artifactId>DJNativeSwing</artifactId>
          <version>1.0.0</version>
        </dependency>
    
        <dependency>
          <groupId>com.hynnet</groupId>
          <artifactId>DJNativeSwing-SWT</artifactId>
          <version>1.0.0</version>
        </dependency>
    
        <dependency>
          <groupId>org.eclipse.swt.org.eclipse.swt.win32.win32.x86_64.4.3.swt</groupId>
          <artifactId>org.eclipse.swt.win32.win32.x86_64</artifactId>
          <version>4.3</version>
        </dependency>

    源码: 

    package cn.cosmos.page;
    
    import chrriis.dj.nativeswing.swtimpl.NativeComponent;
    import chrriis.dj.nativeswing.swtimpl.NativeInterface;
    import chrriis.dj.nativeswing.swtimpl.components.JWebBrowser;
    import chrriis.dj.nativeswing.swtimpl.components.WebBrowserAdapter;
    import chrriis.dj.nativeswing.swtimpl.components.WebBrowserEvent;
    import sun.misc.BASE64Encoder;
    
    import javax.imageio.ImageIO;
    import javax.swing.*;
    import java.awt.*;
    import java.awt.image.BufferedImage;
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.IOException;
    
    /**
     * Created with CosmosRay
     *
     * @author CosmosRay
     * @date 2019/5/22
     * Function:
     */
    public class JFrame extends JPanel {
        // 行分隔符
        final static public String LS = System.getProperty("line.separator", "
    ");
        // 文件分割符
        final static public String FS = System.getProperty("file.separator", "\");
        // 以javascript脚本获得网页全屏后大小
        private static final long serialVersionUID = 1L;
        private static final StringBuffer jsDimension;
    
        static {
            jsDimension = new StringBuffer();
            jsDimension.append("var width = 0;").append(LS);
            jsDimension.append("var height = 0;").append(LS);
            jsDimension.append("if(document.documentElement) {").append(LS);
            jsDimension.append("  width = Math.max(width, document.documentElement.scrollWidth);").append(LS);
            jsDimension.append("  height = Math.max(height, document.documentElement.scrollHeight);").append(LS);
            jsDimension.append("}").append(LS);
            jsDimension.append("if(self.innerWidth) {").append(LS);
            jsDimension.append("  width = Math.max(width, self.innerWidth);").append(LS);
            jsDimension.append("  height = Math.max(height, self.innerHeight);").append(LS);
            jsDimension.append("}").append(LS);
            jsDimension.append("if(document.body.scrollWidth) {").append(LS);
            jsDimension.append("  width = Math.max(width, document.body.scrollWidth);").append(LS);
            jsDimension.append("  height = Math.max(height, document.body.scrollHeight);").append(LS);
            jsDimension.append("}").append(LS);
            jsDimension.append("return width + ':' + height;");
        }
    
        public JFrame(final String url, final int maxWidth, final int maxHeight) {
            super(new BorderLayout());
            JPanel webBrowserPanel = new JPanel(new BorderLayout());
            final String fileName = System.currentTimeMillis() + ".jpg";
            final JWebBrowser webBrowser = new JWebBrowser(null);
            webBrowser.setBarsVisible(false);
            webBrowser.navigate(url);
            webBrowserPanel.add(webBrowser, BorderLayout.CENTER);
            add(webBrowserPanel, BorderLayout.CENTER);
    
            JPanel panel = new JPanel(new FlowLayout(FlowLayout.CENTER, 4, 4));
    
            webBrowser.addWebBrowserListener(new WebBrowserAdapter() {
                                                 // 监听加载进度
                                                 @Override
                                                 public void loadingProgressChanged(WebBrowserEvent e) {
                                                     // 当加载完毕时
                                                     if (e.getWebBrowser().getLoadingProgress() == 100) {
                                                         /*睡眠3秒钟,等待页面请求完毕再截取图片信息
                                                          * 如果不延时,则图片等可能没有时间下载显示
                                                          * 具体的秒数需要根据网速等调整
                                                          * */
                                                         try {
                                                             Thread.sleep(3000);
                                                         } catch (InterruptedException e1) {
                                                             e1.printStackTrace();
                                                         }
                                                         String result = (String) webBrowser.executeJavascriptWithResult(jsDimension.toString());
                                                         int index = result == null ? -1 : result.indexOf(":");
                                                         NativeComponent nativeComponent = webBrowser.getNativeComponent();
                                                         Dimension originalSize = nativeComponent.getSize();
                                                         Dimension imageSize = new Dimension(Integer.parseInt(result.substring(0, index)), Integer.parseInt(result
                                                                 .substring(index + 1)));
                                                         imageSize.width = Math.max(originalSize.width, imageSize.width + 50);
                                                         imageSize.height = Math.max(originalSize.height, imageSize.height + 50);
                                                         nativeComponent.setSize(imageSize);
                                                         BufferedImage image = new BufferedImage(imageSize.width,
                                                                 imageSize.height, BufferedImage.TYPE_INT_RGB);
                                                         nativeComponent.paintComponent(image);
                                                         nativeComponent.setSize(originalSize);
    
                                                         try {
                                                             /*将图片转为base64二进制字节 */
                                                             //io流
                                                             ByteArrayOutputStream baos = new ByteArrayOutputStream();
                                                             //写入流中
                                                             ImageIO.write(image, "jpg", baos);
                                                             //转换成字节
                                                             byte[] bytes = baos.toByteArray();
                                                             BASE64Encoder encoder = new BASE64Encoder();
                                                             //转换成base64串
                                                             String png_base64 = encoder.encodeBuffer(bytes).trim();
                                                             //删除 
    
                                                             png_base64 = png_base64.replaceAll("
    ", "").replaceAll("
    ", "");
                                                             //data:image/png;base64,
                                                             System.out.println(png_base64);
                                                         } catch (IOException e1) {
                                                             e1.printStackTrace();
                                                         }
    
                                                         try {
                                                             // 输出图像
                                                             System.out.println(fileName);
                                                             ImageIO.write(image, "jpg", new File(fileName));
                                                         } catch (IOException ex) {
                                                             ex.printStackTrace();
                                                         }
                                                         // 退出操作
                                                         System.exit(0);
                                                     }
                                                 }
                                             }
            );
            add(panel, BorderLayout.SOUTH);
        }
    
        public static void main(String[] args) {
            NativeInterface.open();
            SwingUtilities.invokeLater(new Runnable() {
                @Override
                public void run() {
                    // SWT组件转Swing组件,不初始化父窗体将无法启动webBrowser
                    javax.swing.JFrame frame = new javax.swing.JFrame("以DJ组件保存指定网页截图");
                    // 加载google,最大保存为640x480的截图
                    //实际项目中传入URL参数,根据不同参数截取不同网页快照,保存地址也可以在构造器中多设置一个参数,保存到指定目录
                    frame.getContentPane().add(new JFrame("http://www.baidu.com", 640, 480), BorderLayout.CENTER);
                    frame.setSize(1400, 900);
    
                    // 仅初始化,但不显示
                    frame.invalidate();
                    frame.pack();
                    frame.setVisible(false);
                }
            });
            NativeInterface.runEventPump();
        }
    }
    

    方法四:Robot

    运行成功后 ,只截取显示器当前显示内容 为jpg格式类型的图片,不会截取我们需要的网页页面

    效果图:

    源码:

    package cn.cosmos.page;
    
    import javax.imageio.ImageIO;
    import java.awt.*;
    import java.awt.event.KeyEvent;
    import java.awt.image.BufferedImage;
    import java.io.File;
    import java.io.IOException;
    import java.net.URISyntaxException;
    import java.net.URL;
    
    /**
     * Created with CosmosRay
     *
     * @author CosmosRay
     * @date 2019/5/22
     * Function:
     */
    public class Robot {
        public static void main(String[] args) throws AWTException, IOException, URISyntaxException {
            // 此方法仅适用于JdK1.6及以上版本
            Desktop.getDesktop().browse(
                    new URL("http://www.baidu.com").toURI());
            java.awt.Robot robot = new java.awt.Robot();
            robot.delay(10000);
            Dimension d = new Dimension(Toolkit.getDefaultToolkit().getScreenSize());
            int width = (int) d.getWidth();
            int height = (int) d.getHeight();
            // 最大化浏览器
            robot.keyRelease(KeyEvent.VK_F11);
            robot.delay(2000);
            Image image = robot.createScreenCapture(new Rectangle(0, 0, width,
                    height));
            BufferedImage bi = new BufferedImage(width, height,
                    BufferedImage.TYPE_INT_RGB);
            Graphics g = bi.createGraphics();
            g.drawImage(image, 0, 0, width, height, null);
            // 保存图片
            ImageIO.write(bi, "jpg", new File("d:/google"+System.currentTimeMillis()+".jpg"));
        }
    }
    

    方法五:xhtmlrenderer

    不支持css,js

    方法六:Html2Image

    可以支持css,但是js不支持,重写HtmlImageGenerator方法

    方法七:Cssbox

    这个方法,不支持引用的外部js、css;其次,自定义设置的宽度有时候不起作用,代码内部有一个默认的宽度是2400,暂时没找到解决方法。

    方法八:HtmlImageGenerator

    对样式识别不好,颜色有差异

    方法九:JFrame

    截取整个屏幕窗体

    方法十:phantomJs

    需要在项目的服务器端引用exe文件进行截图,并发性能有待考证。

  • 相关阅读:
    231. Power of Two
    204. Count Primes
    205. Isomorphic Strings
    203. Remove Linked List Elements
    179. Largest Number
    922. Sort Array By Parity II
    350. Intersection of Two Arrays II
    242. Valid Anagram
    164. Maximum Gap
    147. Insertion Sort List
  • 原文地址:https://www.cnblogs.com/cosmosray/p/13268797.html
Copyright © 2011-2022 走看看